Steroid Data Analysis

Pauli Tikka

2025-01-23

Introduction

# Welcome to the 'steroid data analysis' webpage! 

# The procedures and explanations to make all the analysis and plots are in their individual chapters below. 
# These methods could be also easily applied to other types of data sets and metabolites than 'steroids' and their respective metadata per se. 
# In addition, there is a small 'disclaimer' also at the end of this webpage to emphasize that this site is mainly for educational purposes.
# Please let me know if you have any questions. For that, use the 'following' email: patati at the university of Turku

Loading Required R Packages

# echo=FALSE is too good
# Set library paths if needed
# .libPaths(c("C:/Program Files/R/R-4.4.1/library", .libPaths()))

# List of libraries to load (alphabetically sorted)
packages <- c("bigsnpr", "binilib", "brickster", "car", "censReg", "circlize", "ComplexHeatmap", "correlation", 
              "corrplot", "daiR", "datarium", "dmetar", "dplyr", "effsize", "extrafont", "forcats", "fs", "FSA", 
              "ggcorrplot", "ggforce", "ggforestplot", "ggplot2", "ggplotify", "ggpubr", "ggsankey", "ggsankeyfier", 
              "ggh4x", "ggtext", "glmnet", "grid", "Hmisc", "hrbrthemes", "igraph", "insight", "lavaan", "lmtest", 
              "lme4", "lsr", "magick", "magrittr", "Maaslin2", "mdatools", "mediation", "meta", "mgcv", "mlma", 
              "MOFA2", "pheatmap", "PerformanceAnalytics", "pathviewr", "plyr", "plotrix", "ppcor", "prettydoc", 
              "psych", "quantreg", "qpgraph", "ragg", "RColorBrewer", "rcompanion", "readxl", "remotes", "reshape2", 
              "rgl", "rmarkdown", "rmdformats", "rstatix", "scales", "scater", "scatterplot3d", "sjPlot", "stringr", 
              "superb", "tibble", "tidyverse", "tint", "tufte", "viridis", "xlsx")

# Load all libraries
invisible(lapply(packages, library, character.only = TRUE))

# Note: Do not load 'forestplot' as it conflicts with 'ggforestplot'

# Install packages if not already installed
# renv::install() # Installs from the basic R repository
# if (!require("BiocManager", quietly = TRUE)) install.packages("BiocManager")
# BiocManager::install(c("ComplexHeatmap", "DESeq2", "dmetar", "fgsea", "ggforestplot", "ggsankey", "limma", "Maaslin2", "metagenomeSeq", "MOFA2", "qpgraph", "scater", "scRNAseq", "sevenbridges"))
# remotes::install_github(c("davidsjoberg/ggsankey", "fossbert/binilib", "MathiasHarrer/dmetar", "mattflor/chorddiag", "NightingaleHealth/ggforestplot"))
# devtools::install_github("mattflor/chorddiag") # Alternative installation method

Importing Data and Metadata

#First set your data folder:
setwd("C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis")# or:"C:/Users/patati/Desktop/Turku/R" #check the wd with: here::here() #or getwd()
load("thereal.RData") #This is so to say real data, and it is not available here (at the site).

Setting Global Variables

options(scipen = 999) # Disable scientific notation
# rm(list = ls()) # Clear workspace; this should not be if you have the load above
thedate <- strftime(Sys.Date(), "%d%m%y") #Do not take the old date from the load...
date <- paste0('tikka', thedate) # Customize this as needed

# Example installation commands
# remotes::install_github("fossbert/binilib", force=TRUE)
# install.packages(c('tidyverse', 'tibble'))
# if (!require("BiocManager", quietly = TRUE)) install.packages("BiocManager")
# BiocManager::install("Maaslin2")
# devtools::install_github("davidsjoberg/ggforestplot")
# remotes::install_version("insight", version = "0.20.5", repos = "http://cran.us.r-project.org", force=TRUE)
# font_import() # Import fonts if not already done
# loadfonts(device = "win") # Load fonts for Windows
# renv::status() # Check renv status
# library(rmarkdown); render("path/to/file.Rmd") # Render R Markdown document
# remove.packages("DelayedArray")
# BiocManager::install("DelayedArray")
# install.packages("Require") # Install 'Require' package

Making Boxplots

#https://r-graph-gallery.com/265-grouped-boxplot-with-ggplot2.html
#https://stackoverflow.com/questions/53724834/why-does-the-plot-size-differ-between-docx-and-html-in-rmarkdownrender

boxplots <- function(tvt, Group, Outcome, Out, oute, other) {
  # Filter data based on gender
  tvt <- tvt %>%
    filter(if (Group == 'Male') Gender == 1 else if (Group == 'Female') Gender == 0 else TRUE)
  
  # Prepare data for plotting
  Steroid <- rep(colnames(tvt[, 9:28]), each = nrow(tvt))
  data2 <- rep('Control', nrow(tvt))
  num <- ifelse(Outcome == 'HOMA-IR', 1.5, min(tvt[[Outcome]]))
  data2[tvt[[Outcome]] > num] <- 'Case'
  Treatment <- data2
  Concentration <- as.vector(unlist(tvt[, 9:28]))
  data <- data.frame(Steroid, Treatment, Concentration)
  data$Group <- 0
  
  # Correct steroid names if the level exists
  if ("17aOH-P4" %in% levels(data$Steroid)) {
    data <- data %>%
      mutate(Steroid = fct_recode(Steroid, '17a-OHP4' = '17aOH-P4'))
  }
  
  # Assign groups
  rownames(groups2) <- 1:20
  for (i in seq_len(nrow(groups2))) {
    data[data$Steroid %in% groups2$Abbreviation[i], 'Group'] <- groups2$Group[i]
  }
  
  # Set plot title
  title <- paste(Out, "'s Effect on Concentrations of Steroids in ", Group, sep = "")
  
  # Define legend labels
  e1 <- ifelse(num == 1.5, paste('Case (>', num, ')', sep = ""), paste('Case (>', 0, ')', sep = ""))
  e2 <- ifelse(num == 1.5, paste('Control (<=', num, ')', sep = ""), paste('Control (=', 0, ')', sep = ""))
  
  # Remove rows with NA concentrations
  data <- data %>% filter(!is.na(Concentration))
  
  # Create boxplot
  p <- ggplot(data, aes(x = Steroid, y = Concentration, fill = Treatment)) +
    geom_boxplot(notch = FALSE, notchwidth = 0.5, outlier.shape = 1, outlier.size = 2, coef = 1.5) +
    theme_classic2() +
    theme(axis.text.x = element_text(angle = 90, hjust = 0.95, vjust = 0.2, size = 10.5),
          axis.text = element_text(color = "black"),
          panel.grid.minor = element_blank(),
          text = element_text(size = 10.5, family = "Calibri"),
          axis.title = element_text(size = 14),
          plot.title = element_text(size = 14),
          legend.text = element_text(size = 14),
          legend.title = element_text(size = 14)) +
    labs(x = "Steroids", y = "Log2 of Picomolar Concentrations", title = title) +
    scale_fill_manual(values = c("orange", "blue"), name = oute, labels = c(e1, e2)) +
    facet_grid(~Group, scales = "free_x", space = "free") +
    stat_compare_means(hide.ns = TRUE, label = "p.signif", method = "wilcox.test",
                       symnum.args = list(cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1),
                                          symbols = c("****", "***", "**", "*", "ns")),
                       size = 8, paired = FALSE, label.y = 15.5)
  
  # Save plot as PNG
  path <- "C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis/"
  pngfile <- fs::path(path, paste0(Group, Out, 'boxe', ".png"))
  knitr::include_graphics(pngfile)
}

# Example usage
tv_half_log22[, '11-KA4'][tv_half_log22[, '11-KA4'] == min(tv_half_log22[, '11-KA4'])] <- median(tv_half_log22[, '11-KA4'])
other <- '261124'
ie <- tv_half_log22
hupo <- match(colnames(ie)[colnames(ie) %in% groups2[, 3]], groups2[, 3])
colnames(ie)[colnames(ie) %in% groups2[, 3]] <- groups2[hupo, 2]
windowsFonts(A = windowsFont("Calibri (Body)"))

# The significance levels are: '****<0.001', '***<0.01', '**<0.05', '*<0.1'
Outcome='Steatosis Grade';Out='Steatosis'; oute='Steatosis Grade';num=0;Group='All';boxplots(ie,Group,Outcome,Out,oute,other);Group='Female';boxplots(ie,Group,Outcome,Out,oute,other);Group='Male';boxplots(ie,Group,Outcome,Out,oute,other)

Outcome='Fibrosis Stage';Out='Fibrosis'; oute='Fibrosis Stage';num=0;Group='All';boxplots(ie,Group,Outcome,Out,oute,other);Group='Female';boxplots(ie,Group,Outcome,Out,oute,other);Group='Male';boxplots(ie,Group,Outcome,Out,oute,other) 

# https://www.elsevier.es/en-revista-annals-hepatology-16-articulo-assessment-hepatic-fibrosis-necroinflammation-among-S1665268119314590 #So it is in grade
Outcome='Necroinflammation';Out='Necroinflammation'; oute='Necroinflammation Grade';num=0;Group='All';boxplots(ie,Group,Outcome,Out,oute,other);Group='Female';boxplots(ie,Group,Outcome,Out,oute,other);Group='Male';boxplots(ie,Group,Outcome,Out,oute,other)

Outcome='HOMA-IR';Out='HOMA-IR'; oute='HOMA-IR';num=1.5 ;Group='All';boxplots(ie,Group,Outcome,Out,oute,other);Group='Female';boxplots(ie,Group,Outcome,Out,oute,other);Group='Male';boxplots(ie,Group,Outcome,Out,oute,other)

Making Forest Plots

# Define the NAFLD dataset by selecting the first 28 columns from tv
NAFLD <- tv[, 1:28]
# Convert specific columns to binary values using vectorized operations
cols_to_binary <- c(5, 6, 7)
NAFLD[, cols_to_binary] <- (NAFLD[, cols_to_binary] > 0) * 1
# Convert column 8 to binary based on the threshold of 1.5
NAFLD[, 8] <- (NAFLD[, 8] > 1.5) * 1
# Clean column names to remove special characters and make them consistent
patterns <- c("-", "/", "11", "17", "#")
replacements <- c(".", ".", "X11", "X17", ".") #?
# Ensure patterns and replacements are correctly paired
if (length(patterns) == length(replacements)) {
  for (i in seq_along(patterns)) {
    colnames(NAFLD) <- gsub(patterns[i], replacements[i], colnames(NAFLD))}} else {
      stop("Patterns and replacements vectors must be of the same length.")}


# This works with the autoscaled (raw if loge=1 and remove 1 in the means) data NAFLD as well...
pre_errors_2=function(NAFLD,Outcome,Group,name,ordera,oute,first,e,xlim) { # Group='Female'
  
  # Filter data based on the 'Group' variable
  NAFLDo <- switch(Group,
                   "Male" = NAFLD[NAFLD[,'SEX.1F.2M'] == 2, ],
                   "Female" = NAFLD[NAFLD[,'SEX.1F.2M'] == 1, ],
                   "All" = NAFLD)
  
  # Initialize vectors to store sample data and counts
  sample_data <- list()
  n0 <- n1 <- 0

  # Loop through the two groups (Outcome == 0 and Outcome > 0)
  for (i in 1:2) {
    SG0 <- if (i == 1) {
      NAFLDo[NAFLDo[, Outcome] == 0, ]
    } else {
      NAFLDo[NAFLDo[, Outcome] > 0, ]}
    
    # Store the count of samples in each group
    if (i == 1) {
      n0 <- nrow(SG0)
    } else {
      n1 <- nrow(SG0)}
  
  # Calculate medians and standard deviations for columns 9 to 28
  means <- apply(SG0[, 9:28], 2, median, na.rm = TRUE)
  sds <- apply(SG0[, 9:28], 2, sd, na.rm = TRUE)
  
  # Calculate error margins
  error_lower <- means - sds
  error_upper <- means + sds
  error <- sds
  
  # Append results to sample_data
  sample_data[[i]] <- data.frame(study = colnames(NAFLD[, 9:28]),
                                 index = colnames(NAFLD[, 9:28]),
                                 result = means,
                                 error = error)}
  df=data.frame(sample_data) #
  
  # Calculate p-values using Wilcoxon test for columns 9 to 28
  ps <- sapply(9:28, function(j) {
    xnam <- colnames(NAFLDo)[j]
    fmla <- as.formula(paste(xnam, "~", Outcome))
    wilcox.test(fmla, data = NAFLDo, exact = FALSE)$p.value})
    #https://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test
  
  # Calculate the ratio of results and log-transform
  a <- df[df[, 1] == e, 'result.1'] / df[df[, 1] == e, 'result']
  v2 <- data.frame(log(df$result.1 / df$result))
  
  # Rename columns and clean up variable names
  v2$result <- v2[, 1]
  v2$name <- df$study
  v2 <- v2[, 2:3]
  v2$name <- gsub("\\.", "-", v2$name)
  v2$name <- gsub("X11", "11", v2$name)
  v2$name <- gsub("X17", "17", v2$name)
  v2$name[v2$name == "T-Epi-T"] <- "T/Epi-T"
  v2$pval <- ps

  # Calculate result_pure and error
  v2$result_pure <- df$result.1 / df$result
  v2$error <- (abs((1 / df$result) * df$error.1) + abs((df$result.1 / df$result^2) * df$error)) / nrow(NAFLDo) * 1.64
  
  # Adjust error values
  v2$error <- ifelse(v2$error > (median(v2$error) + sd(v2$error)), median(v2$error) * 1.25, v2$error)
  
  # Calculate error bounds and log-transformed values
  v2$errord1a <- v2$result_pure - v2$error
  v2$errord2a <- v2$result_pure + v2$error
  v2$errord1 <- log(v2$errord1a)
  v2$errord2 <- log(v2$errord2a)
  v2$result <- log(v2$result_pure)
  v2$Control <- df$result
  v2$Case <- df$result.1
  
  # Add p-values and significance
  v2$pval0 <- v2$pval
  v2$pval1 <- v2$pval
  v2$Significance0 <- ifelse(v2$pval0 < 0.1, 'Yes', 'No')
  v2$Color0 <- ifelse(v2$pval0 < 0.1, 'blue', 'grey')
  v2$Significance1 <- ifelse(v2$pval1 < 0.1, 'Yes', 'No')
  v2$Color1 <- ifelse(v2$pval1 < 0.1, 'blue', 'grey')
  
  # Merge with group data and sort
  gn <- groups[groups$Abbreviation != 'F', c('Group', 'Abbreviation')]
  gn <- gn[order(gn$Abbreviation), ]
  v2 <- v2[order(v2$name), ]
  v2 <- cbind(v2, gn[order(gn$Abbreviation), ])
  v2 <- v2[order(-v2$result), ]

  xlab = "Autoscaled Concentrations (SE)" #xlab = "Raw Concentrations in Log10 Scale (SE)"}
  xlim=c(min(v2$errord1),max(v2$errord2)) 
  #Occasionally: xlim=c(min(v2$result)*1.1,max(v2$result)*1.1) # if (xlim[2]>1) {xlim[2]=1};# if (xlim[1] < -0.75) {xlim[1]=-0.75};
  
 # Create forest plot
  plote2 <- forestplot(df = v2,
                       estimate = result,
                       se = 0,
                       pvalue = pval1,
                       psignif = 0.1,
                       xlim = xlim,
                       xlab = 'Logged Ratio between Raw Concentrations of Case and Control with 90% CI',
                       ylab = 'Steroid Groups',
                       title = '',
                       colour = Significance1) +
    ggforce::facet_col(facets = ~Group, scales = "free_y", space = "free", strip.position = 'left') +
    geom_errorbarh(aes(xmin = errord1, xmax = errord2, height = .0, colour = Significance1))
  
  # Set color palette
  hp <- if (sum(v2$Significance1 == 'Yes') == 20) c('blue', 'blue') else c('#999999', 'blue')
  
  # Order factor levels based on Group and first
  if (Group=='All' & first==TRUE) {ordera=rev(groups$Abbreviation)#v2$name[order(v2$result)]; #
  plote2[["data"]][["name"]]=factor(plote2[["data"]][["name"]], levels = ordera)} else if
  (Group=='All' & first==FALSE) {plote2[["data"]][["name"]]=factor(plote2[["data"]][["name"]], levels = ordera)} else if
  (Group=='Female') {plote2[["data"]][["name"]]=factor(plote2[["data"]][["name"]], levels = ordera)} else if
  (Group=='Male') {plote2[["data"]][["name"]]=factor(plote2[["data"]][["name"]], levels = ordera)}
  #https://www.r-bloggers.com/2020/03/how-to-standardize-group-colors-in-data-visualizations-in-r/
  plote2$layers[[1]]$aes_params$odd <- "#00000000" #https://stackoverflow.com/questions/71745719/how-to-control-stripe-transparency-using-ggforestplot-geom-stripes
  
  v2$Group2=v2$Group
  v2 <- transform(v2,Group2 = as.numeric(as.factor(Group2)))
  v2$facet_fill_color <- c("red", "green", "blue", "yellow", "brown")[v2$Group2]
  
  # Create plot with custom themes
  jopon <- plote2 + theme(axis.text.y = element_blank()) + theme_classic2()
  jopon2 <- jopon + geom_point(aes(colour = factor(Significance1)), colour = v2$Color1) +
    scale_color_manual(values = hp) + theme(legend.position = "none") + theme(strip.text.y = element_text(size = -Inf))
  
  # Customize facet strip colors
  g <- ggplot_gtable(ggplot_build(jopon2))
  stripr <- which(grepl('strip-l', g$layout$name))
  fills <- c("red", "green", "blue", "yellow", "brown")
  for (i in seq_along(stripr)) {
    j <- which(grepl('rect', g$grobs[[stripr[i]]]$grobs[[1]]$childrenOrder))
    g$grobs[[stripr[i]]]$grobs[[1]]$children[[j]]$gp$fill <- fills[i]}
  # grid::grid.draw(g)
  
  # Save plot as JPEG and convert to PDF and SVG
  jpeg(paste(name, "divi.jpg"), width = 7500, height = 11000, quality = 100, pointsize = 16, res = 1000)
  print(grid::grid.draw(g))
  dev.off()
  
  daiR::image_to_pdf(paste(name, "divi.jpg"), pdf_name = paste0(paste(name, "divi.jpg"), '.pdf'))
  my_image <- image_read(paste(name, "divi.jpg"))
  my_svg <- image_convert(my_image, format = "svg")
  image_write(my_svg, paste(name, "divi.svg"))
  
  return(ordera) #If you do not want to have 'null' to the Rmarkdown/html take this away
  } 


# This is with first(!!). Use it. 
Outcome='Steatosis.Grade.0.To.3';Out='Steatosis'; oute='Steatosis';first=TRUE; e='P4';ordera=c();
Group='All';name1=paste("Forest plot of",Group, "Steroid Ratios in",Out);
hel=pre_errors_2(NAFLD,Outcome,Group,name1,ordera,oute,first,e,xlim)
# #Afterwards:
first=FALSE;
Group='Female';name2=paste("Forest plot of",Group, "Steroid Ratios in",Out);
pre_errors_2(NAFLD,Outcome,Group,name2,ordera=hel,oute,first,e,xlim)
Group='Male'; name3=paste("Forest plot of",Group, "Steroid Ratios in",Out);
pre_errors_2(NAFLD,Outcome,Group,name3,ordera=hel,oute,first,e,xlim)
# 
Outcome='Fibrosis.Stage.0.to.4'; Out='Fibrosis';oute='Fibrosis';
Group='All'; name4=paste("Forest plot of",Group, "Steroid Ratios in",Out);
pre_errors_2(NAFLD,Outcome,Group,name4,ordera=hel,oute,first,e,xlim)
Group='Female';name5=paste("Forest plot of",Group, "Steroid Ratios in",Out);
pre_errors_2(NAFLD,Outcome,Group,name5,ordera=hel,oute,first,e,xlim)
Group='Male'; name6=paste("Forest plot of",Group, "Steroid Ratios in",Out);
pre_errors_2(NAFLD,Outcome,Group,name6,ordera=hel,oute,first,e,xlim)
# 
Outcome='Necroinflammation'; Out='Necroinflammation';oute='Necroinflammation';
Group='All'; name7=paste("Forest plot of",Group, "Steroid Ratios in",Out); 
pre_errors_2(NAFLD,Outcome,Group,name7,ordera=hel,oute,first,e,xlim) #not the very first though...
Group='Female';name8=paste("Forest plot of",Group, "Steroid Ratios in",Out);
pre_errors_2(NAFLD,Outcome,Group,name8,ordera=hel,oute,first,e,xlim)
Group='Male'; name9=paste("Forest plot of",Group, "Steroid Ratios in",Out); 
pre_errors_2(NAFLD,Outcome,Group,name9,ordera=hel,oute,first,e,xlim)
# 
Outcome='HOMA.IR';Out='HOMA-IR';oute='HOMAIR';
Group='All';name10=paste("Forest plot of",Group, "Steroid Ratios in",Out);
pre_errors_2(NAFLD,Outcome,Group,name10,ordera=hel,oute,first,e,xlim) #not the very first though...
Group='Female';name11=paste("Forest plot of",Group, "Steroid Ratios in",Out); 
pre_errors_2(NAFLD,Outcome,Group,name11,ordera=hel,oute,first,e,xlim)
Group='Male'; name12=paste("Forest plot of",Group, "Steroid Ratios in",Out); 
pre_errors_2(NAFLD,Outcome,Group,name12,ordera=hel,oute,first,e,xlim)
# Fyi: I was able to revise some of the above codes with Copilot...
Forest plot of All Steroid Ratios in Steatosis

Forest plot of All Steroid Ratios in Steatosis

Forest plot of Female Steroid Ratios in Steatosis

Forest plot of Female Steroid Ratios in Steatosis

Forest plot of Male Steroid Ratios in Steatosis

Forest plot of Male Steroid Ratios in Steatosis

Forest plot of All Steroid Ratios in Fibrosis

Forest plot of All Steroid Ratios in Fibrosis

Forest plot of Female Steroid Ratios in Fibrosis

Forest plot of Female Steroid Ratios in Fibrosis

Forest plot of Male Steroid Ratios in Fibrosis

Forest plot of Male Steroid Ratios in Fibrosis

Making Chord Diagrams

# First the correlations for the chord diagrams (both male and female as well as total subjects):

# Copy tvauxe to tv_c
tv_c = tvauxe

# Match column names in tv_c with the third column in groups2 and get their indices
hupo = match(colnames(tv_c)[colnames(tv_c) %in% groups2[, 3]], groups2[, 3])
# Replace matched column names in tv_c with corresponding values from the second column in groups2
colnames(tv_c)[colnames(tv_c) %in% groups2[, 3]] = groups2[hupo, 2]
ok = colnames(tv_c)

# Convert tv_c to a data frame
tv_c = data.frame(tv_c)
# Remove specific columns from tv_c
tv_c = tv_c[, !colnames(tv_c) %in% c('Total_TG', 'PFAS', "Perfluorodecyl.ethanoic.acid")]

# Separate data by gender
tvf = tv_c[tv_c[, 'Gender'] == min(tv_c[, 'Gender']), 1:dim(tv_c)[2]]
tvm = tv_c[tv_c[, 'Gender'] == max(tv_c[, 'Gender']), 1:dim(tv_c)[2]]

# Create a list of data frames for total, female, and male subjects
tvtest = list(tv_c, tvf, tvm)

# Clean up column names in each data frame in tvtest
for (i in 1:3) {
  colnames(tvtest[[i]]) <- gsub("\\.", "-", colnames(tvtest[[i]]))
  colnames(tvtest[[i]]) <- gsub("X11", "11", colnames(tvtest[[i]]))
  colnames(tvtest[[i]]) <- gsub("X17", "17", colnames(tvtest[[i]]))
  colnames(tvtest[[i]])[colnames(tvtest[[i]]) == "T-Epi-T"] = "T/Epi-T"
  colnames(tvtest[[i]])[colnames(tvtest[[i]]) == "Steatosis-Grade"] = "Steatosis Grade"
  colnames(tvtest[[i]])[colnames(tvtest[[i]]) == "Fibrosis-Stage"] = "Fibrosis Stage"
  colnames(tvtest[[i]])[colnames(tvtest[[i]]) == "17aOH-P4"] = "17a-OHP4"
  colnames(tvtest[[i]])[colnames(tvtest[[i]]) == "HOMA IR"] = "HOMA-IR"
}

# Assign cleaned data frames back to tv_c, tvf, and tvm
tv_c = tvtest[[1]]
tvf = tvtest[[2]]
tvm = tvtest[[3]]

# Rename specific value in x4
x4[x4 == "X7.oxo.DCA_S"] = "X7-oxo-DCA_S"

# Calculate Spearman correlations for total subjects
dat = tv_c
dat = dat %>% select(-c('PatientNumber'))  # Remove 'PatientNumber' column
resulta <- (rcorr(as.matrix(dat), type = c('spearman')))$r  # Calculate correlation matrix
colnames(resulta) = ok[2:dim(tv_c)[2]]
rownames(resulta) = ok[2:dim(tv_c)[2]]

# Calculate Spearman correlations for female subjects
dat = tvf
dat = dat %>% select(-c('PatientNumber', 'Gender'))  # Remove 'PatientNumber' and 'Gender' columns
resultaf <- (rcorr(as.matrix(dat), type = c('spearman')))$r
colnames(resultaf) = ok[3:dim(tv_c)[2]]
rownames(resultaf) = ok[3:dim(tv_c)[2]]

# Calculate Spearman correlations for male subjects
dat = tvm
dat = dat %>% select(-c('PatientNumber', 'Gender'))  # Remove 'PatientNumber' and 'Gender' columns
resultam <- (rcorr(as.matrix(dat), type = c('spearman')))$r
colnames(resultam) = ok[3:dim(tv_c)[2]]
rownames(resultam) = ok[3:dim(tv_c)[2]]

# Define column groups for different variables
at = colnames(resulta)[1:(length(x1) - 1)]  # Clinicals
bt = colnames(resulta)[(length(at) + 1):(length(at) + length(x2))]  # Steroids
ct = colnames(resulta)[(length(at) + length(bt) + 1):(length(at) + length(bt) + length(x3))]  # BA_l
dt = colnames(resulta)[(length(at) + length(bt) + length(ct) + 1):(length(at) + length(bt) + length(ct) + length(x4))]  # BA_s
et = colnames(resulta)[(length(at) + length(bt) + length(ct) + length(dt) + 1):(length(at) + length(bt) + length(ct) + length(dt) + length(x5))]  # PFAS
ft = colnames(resulta)[(length(at) + length(bt) + length(ct) + length(dt) + length(et) + 1):(length(at) + length(bt) + length(ct) + length(dt) + length(et) + length(x6))]  #

# Store lengths of each group
atl = length(at)
btl = length(bt)
ctl = length(ct)
dtl = length(dt)
etl = length(et)
ftl = length(ft)

# Set significance level for correlations
n_level = 0.01

# Filter correlations based on significance level for total subjects
Nrr = qpNrr(resulta, verbose = FALSE)
Nrr[is.na(Nrr)] = 1
cond = data.frame(as.matrix(Nrr < n_level))
RN = data.frame(resulta)
tes_t = cond * RN
tes_t = as.matrix(tes_t)
resulta = tes_t

# Filter correlations based on significance level for female subjects
Nrr = qpNrr(resultaf, verbose = FALSE)
Nrr[is.na(Nrr)] = 1
cond = data.frame(as.matrix(Nrr < n_level))
RN = data.frame(resultaf)
tes_t = cond * RN
tes_t = as.matrix(tes_t)
resultaf = tes_t

# Filter correlations based on significance level for male subjects
Nrr = qpNrr(resultam, verbose = FALSE)
Nrr[is.na(Nrr)] = 1
cond = data.frame(as.matrix(Nrr < n_level))
RN = data.frame(resultam)
tes_t = cond * RN
tes_t = as.matrix(tes_t)
resultam = tes_t

# Rename 'Gender' column to 'Sex(F-M+)' in correlation matrices
colnames(resulta)[colnames(resulta) == 'Gender'] <- 'Sex(F-M+)'
rownames(resulta)[rownames(resulta) == 'Gender'] <- 'Sex(F-M+)'

# Update column and row names in correlation matrices
colnames(resulta)[2:dim(resulta)[2]] = ok[3:dim(tv_c)[2]]
rownames(resulta)[2:dim(resulta)[2]] = ok[3:dim(tv_c)[2]]
colnames(resultaf)[2:dim(resultaf)[2]] = ok[4:dim(tv_c)[2]]
rownames(resultaf)[2:dim(resultaf)[2]] = ok[4:dim(tv_c)[2]]
colnames(resultam)[2:dim(resultam)[2]] = ok[4:dim(tv_c)[2]]
rownames(resultam)[2:dim(resultam)[2]] = ok[4:dim(tv_c)[2]]

# Function to create chord diagrams for different groups
group_chords <- function(vars, n_level, fig_name, big, rem, modi, colt, gend, colors, a, b, c, d, e, f) {
  classes <- 5
  tot <- rownames(resulta)[2:dim(resulta)[1]]
  range <- 1:(a + b + c + e + f)
  layout(matrix(1:1, 1, 1))
  title <- 'Sex'
  genders <- gend
  windowsFonts(A = windowsFont("Calibri (Body)"))
  i <- 1
  tes_t <- vars    
  
  # Set column and row names based on gender
    if (gend == 'All') {
    colnames(tes_t) <- rownames(resulta)
    rownames(tes_t) <- rownames(resulta) } else {
    colnames(tes_t) <- rownames(resultaf)
    rownames(tes_t) <- rownames(resultaf)}
  

  
  # Define groups for different variables
  g1 <- c(rep('Clinical', a), rep('Steroids', b), rep('BA_liver', c), rep('Contaminants', e), rep('Lipids', f))
  
  # Remove self-correlations within each group
  tes_t[1:a, 1:a] <- 0
  tes_t[(a + 1):(a + b), (a + 1):(a + b)] <- 0
  tes_t[(a + b + 1):(a + b + c), (a + b + 1):(a + b + c)] <- 0
  tes_t[(a + b + c + 1):(a + b + c + e), (a + b + c + 1):(a + b + c + e)] <- 0
  tes_t[(a + b + c + e + 1):(a + b + c + e + f), (a + b + c + e + 1):(a + b + c + e + f)] <- 0
  
  # Define group structure and color palette
  group <- structure(g1, names = colnames(tes_t))
  grid.col <- structure(c(rep('#93c29f', a), rep('#a83277', b), rep('red', c), rep('grey', e), rep('black', f)),
                        names = rownames(tes_t))
  
  # Filter and adjust correlation matrix
  tes_t <- tes_t[range, range]
  grid.col <- grid.col[range]
  g <- graph.adjacency(tes_t, mode = "upper", weighted = TRUE, diag = FALSE)
  e <- get.edgelist(g)
  df <- as.data.frame(cbind(e, E(g)$weight))
  df[, 3] <- as.numeric(df[, 3])
  
  # Define color function for edges
  col_fun <- colorRamp2(c(-1, 0, 1), c("blue", 'white', "orange"), transparency = 0.25)
  
  # Remove specified elements from the data frame
  df <- df[!df$V1 %in% rem, ]
  df <- df[!df$V2 %in% rem, ]
  
  # Define legends for the plot
  lgd_group <- Legend(at = gend, type = "points", legend_gp = gpar(col = colors), title_position = "topleft", title = title)
  lgd_points <- Legend(at = unique(g1), type = "points", legend_gp = gpar(col = unique(grid.col)), title_position = "topleft", title = "Class")
  lgd_lines <- Legend(at = c("Positive", "Negative"), type = "points", legend_gp = gpar(col = c('orange', 'blue')), title_position = "topleft", title = "Correlation")
  lgd_edges <- Legend(at = c(-1, 1), col_fun = col_fun, title_position = "topleft", title = "Edges")
  lgd_list_vertical <- packLegend(lgd_group, lgd_points, lgd_lines, lgd_edges)
  
  # Set parameters for the chord diagram
  circos.par(gap.after = 1.5, start.degree = 90)
  chordDiagram(df, annotationTrack = c("grid"), grid.col = grid.col, directional = FALSE, symmetric = TRUE, scale = FALSE,
               link.lwd = 0.3, link.border = "white", order = rownames(tes_t), preAllocateTracks = 1, col = col_fun, transparency = 0.25, big.gap = 10, small.gap = 1)
  
  # Add text and axis to the plot
  circos.trackPlotRegion(track.index = 1, panel.fun = function(x, y) {
    xlim <- get.cell.meta.data("xlim")
    ylim <- get.cell.meta.data("ylim")
    sector.name <- get.cell.meta.data("sector.index")
    circos.text(mean(xlim), ylim[1] + .1, sector.name, facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.5))
    circos.axis(h = "top", labels.cex = 0.000001, major.tick.length = 0.2, sector.index = sector.name, track.index = 2)
  }, bg.border = NA)
  
  # Set font and draw legends
  windowsFonts(A = windowsFont("Calibri (Body)"))
  draw(lgd_list_vertical, x = unit(5, "mm"), y = unit(5, "mm"), just = c("left", "bottom"))
  
  # Save the plot as a JPEG file
  dev.copy(jpeg, paste0(gend, n_level, 'hiee.jpg'), width = 12, height = 12, units = "in", res = 1000)
  dev.off()
  
  # Include the plot in the report and convert to PDF and SVG
  knitr::include_graphics(paste0(gend, n_level, 'hiee.jpg'))
  daiR::image_to_pdf(paste0(gend, n_level, 'hiee.jpg'), pdf_name = paste0(paste0(gend, n_level, 'hie'), '.pdf'))
  my_image <- image_read(paste0(gend, n_level, 'hiee.jpg'))
  my_svg <- image_convert(my_image, format = "svg")
  image_write(my_svg, paste(paste0(gend, n_level, 'hie.jpg'), ".svg"))
}

# All variables
n_level = 0.01
circos.clear()
vars = list(resulta)
big = 'Yes'
title = 'All Variables'
rem = x4
modi = 5
colt = 'black'
a = length(x1) - 1
b = length(x2)
c = length(x3)
d = length(x4)
e = length(x5)
f = length(x6)
gend = c('All')
colors = c('blue')
group_chords(vars[[1]], n_level, fig_name, big, rem, modi, colt, gend, colors, a, b, c, d, e, f)

# Genderwise:
vars = list(resultaf, resultam)
big = 'No'
title = 'Genders Separated'
rem = x4
modi = 4
colt = 'black'
colors = c('white', 'black')
a = length(x1) - 2
b = length(x2)
c = length(x3)
d = length(x4)
e = length(x5)
f = length(x6)
gend = c('Female')
colors = c('white')
group_chords(vars[[1]], n_level, fig_name, big, rem, modi, colt, gend, colors, a, b, c, d, e, f)

gend = c('Male')
colors = c('black')
group_chords(vars[[2]], n_level, fig_name, big, rem, modi, colt, gend, colors, a, b, c, d, e, f)

#Copiloting is not working very well here, so I just let it comment some of the lines ...

Making Variance Explained Plots

# Some info regarding of making the data:
# This is it! https://bioconductor.org/packages/release/bioc/vignettes/scater/inst/doc/overview.html
#
# https://stats.stackexchange.com/questions/79399/calculate-variance-explained-by-each-predictor-in-multiple-regression-using-r
# https://rdrr.io/github/MRCIEU/TwoSampleMR/man/get_r_from_pn.html
# https://onlinestatbook.com/2/effect_size/variance_explained.html
# https://stackoverflow.com/questions/10441437/why-am-i-getting-x-in-my-column-names-when-reading-a-data-frame
# https://stackoverflow.com/questions/27044727/removing-characters-from-string-in-r
varex_groups_plot <- function(tv_all, Group) {
  # Initialize error flag
  an.error.occured <- FALSE
  tv_all2 <- tv_all
  
  # Check if 'Gender' column exists
  tryCatch({
    tv_all2[, 'Gender']
  }, error = function(e) {
    an.error.occured <<- TRUE
  })
  
  # Determine condition based on Group and presence of 'Gender' column
  cond <- if (an.error.occured) {
    if (Group == 'female') {
      tv_all2[, 'SEX.1F.2M'] == min(tv_all2[, 'SEX.1F.2M'])
    } else if (Group == 'male') {
      tv_all2[, 'SEX.1F.2M'] == max(tv_all2[, 'SEX.1F.2M'])
    } else {
      rep(TRUE, nrow(tv_all2))
    }
  } else {
    if (Group == 'female') {
      tv_all2[, 'Gender'] == min(tv_all2[, 'Gender'])
    } else if (Group == 'male') {
      tv_all2[, 'Gender'] == max(tv_all2[, 'Gender'])
    } else {
      rep(TRUE, nrow(tv_all2))
    }
  }
  
  # Filter data based on condition
  tv_red <- tv_all2[cond, ]
  hep <- tv_red
  colnames(hep)[1:8] <- colnames(tv_red)[1:8]
  
  # Transpose the data for SingleCellExperiment
  tv2 <- t(hep[, 9:ncol(tv_red)])
  
  # Create SingleCellExperiment object
  sce <- SingleCellExperiment(tv2)
  logcounts(sce) <- tv2
  sce@colData <- DataFrame(hep[, 2:8])
  colnames(colData(sce)) <- colnames(tv_all)[2:8]
  colnames(colData(sce))[1] <- 'The Gender'

  # Calculate variance explained
  vars <- getVarianceExplained(sce, variables = colnames(colData(sce))[1:7])
  colVars(vars)

  # Set font and color palette
  windowsFonts(A = windowsFont("Calibri (Body)"))
  mypalette <- scales::hue_pal()(ncol(colData(sce)))
  names(mypalette) <- colnames(tv_all)[2:8]
  
  # Adjust vars if Group is not 'All'
  if (Group != 'All') {
    vars <- vars[, 2:7]
  }
  
  # Plot explanatory variables
  p <- plotExplanatoryVariables(vars) +
    theme(text = element_text(size = 25, family = "Calibri")) +
    theme(axis.text = element_text(size = 20, family = "Calibri"))
  
  # Clean up plot data
  p[[1]] <- p[[1]][!is.na(p[[1]][, 1]), ]
  p[[1]][, 1] <- as.vector(unlist(p[[1]][, 1]))
  p[[1]] <- p[[1]][order(p[[1]][, 1]), ]

  # Save plot as PNG
  path <- "C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis/"
  sips <- paste0(Group, 'vex', ".png")
  pngfile <- fs::path(path, sips)
  agg_png(pngfile, width = 60, height = 36, units = "cm", res = 300, scaling = 2)
  plot(p)
  invisible(dev.off())
  
  # Include plot in report
  knitr::include_graphics(pngfile)
  
  # Convert image to PDF and SVG
  daiR::image_to_pdf(sips, pdf_name = paste0(sips, '.pdf'))
  my_image <- image_read(sips)
  my_svg <- image_convert(my_image, format = "svg")
  image_write(my_svg, paste(sips, ".svg"))
  p
  
}
# Example usage:
varex_groups_plot(tv_all, Group = 'All')

varex_groups_plot(tv_all, Group = 'female')

varex_groups_plot(tv_all, Group = 'male')

#Copilot helped with the commenting here.

Making Heatmap with Effect Sizes

# Function to calculate Cohen's d effect sizes
# https://www.statology.org/cohens-d-in-r/
cohd <- function(NAFLD, tv, Group, Outcome) {
  
  # Filter data based on gender
  if (Group == 'Male') {
    NAFLDo <- NAFLD[NAFLD[, 'Gender'] == max(NAFLD[, 'Gender']), ]
    tva <- tv[tv[, 'SEX.1F.2M'] == max(tv[, 'SEX.1F.2M']), ]
  } else if (Group == 'Female') {
    NAFLDo <- NAFLD[NAFLD[, 'Gender'] == min(NAFLD[, 'Gender']), ]
    tva <- tv[tv[, 'SEX.1F.2M'] == min(tv[, 'SEX.1F.2M']), ]
  } else {
    NAFLDo <- NAFLD
    tva <- tv}
  
  # Check if the Outcome column exists
  if (!Outcome %in% colnames(NAFLDo)) {
    stop("The specified Outcome column does not exist in the data frame.")}
  
  # Filter data based on outcome
  if (Outcome != 'HOMA-IR') {
    SG0 <- NAFLDo[NAFLDo[, Outcome] == min(NAFLDo[, Outcome]), ]
    SG1 <- NAFLDo[NAFLDo[, Outcome] > min(NAFLDo[, Outcome]), ]
  } else {
    SG0 <- NAFLDo[tva[, 'HOMA-IR'] <= 1.5, ]
    SG1 <- NAFLDo[tva[, 'HOMA-IR'] > 1.5, ]}
  
  # Initialize vector to store Cohen's d values
  cd <- numeric(20)
  
  # Calculate Cohen's d for each variable
  for (i in 1:20) {
    group1 <- SG0[, i + 8]
    group2 <- SG1[, i + 8]
    
    data <- data.frame(
      value = c(group1, group2),
      group = factor(rep(c("group1", "group2"), c(length(group1), length(group2)))))
    
    result <- cohen.d(value ~ group, data = data)
    cd[i] <- result$cohen.d[2]}
  
  return(cd)}

# Initialize an empty vector
d <- c()

# Define the dataset
NAFLD <- tv_all

# Function to calculate Cohen's d for different groups and outcomes
calculate_cohd <- function(outcome) {
  Group <- 'All'
  a <- cohd(NAFLD, tv, Group, outcome)
  Group <- 'Female'
  b <- cohd(NAFLD, tv, Group, outcome)
  Group <- 'Male'
  c <- cohd(NAFLD, tv, Group, outcome)
  cbind(a, b, c)}

# Calculate Cohen's d for different outcomes and combine results
outcomes <- c('Steatosis Grade', 'Fibrosis Stage', 'Necroinflammation', 'HOMA-IR')
for (outcome in outcomes) {
  d <- cbind(d, calculate_cohd(outcome))}

# Set row names
rownames(d) <- colnames(tv_all[, 9:28])

# Create column names with groups and outcomes
colnames(d) <- unlist(lapply(outcomes, function(outcome) {
  paste(c('All', 'Female', 'Male'), outcome, sep = "_")}))

# Save the results to a CSV file
write.csv(d, paste0('cohens_da_', thedate, '.csv'))

# Convert the results to a data frame
n <- d
x <- data.frame(n)
row.names(x) <- colnames(tv_all[, 9:28])

# Adjust group names
groups <- groups
groups[groups == "17a-OHP4"] <- "17aOH-P4"
op <- groups[order(groups$Group), 'Abbreviation']
op <- op[op %in% row.names(x)]
x <- x[op, ]

# Function to create breaks for the heatmap
brks_heatmap <- function(mat, color_palette) {
  rng <- range(mat, na.rm = TRUE)
  lpal <- length(color_palette)
  c(seq(rng[1], 0, length.out = ceiling(lpal / 2) + 1),
    seq(rng[2] / dim(mat)[1], rng[2], length.out = floor(lpal / 2)))}

# Define color palette
color_palette <- colorRampPalette(c('blue', 'white', 'orange'), alpha = TRUE)(150)

# Save heatmap as JPEG
jpeg(paste0("cohensd_e2", date, ".jpg"), width = 9, height = 12, units = "in", res = 1000)

# Set viewport for the heatmap
setHook("grid.newpage", function() pushViewport(viewport(x = 1, y = 1, width = 0.9, height = 0.9, name = "vp", just = c("right", "top"))), action = "prepend")

# Generate heatmap
pheatmap(x, cluster_cols = FALSE, cluster_rows = FALSE, breaks = brks_heatmap(x, color_palette), color = color_palette, column_names_side = "bottom", angle_col = 90)

# Reset viewport hook
setHook("grid.newpage", NULL, "replace")

# Add text annotations
grid.text("Steatosis, Fibrosis, Necroinflammation, HOMA-IR", y = -0.07, x = 0.4, gp = gpar(fontsize = 16))
grid.text("Steroids (Androgens, Estrogens, Gluc., Mineraloc., Progestogens)", 
          x = -0.07, rot = 90, gp = gpar(fontsize = 16))

# Save the plot again for quality reasons
dev.copy(jpeg, paste0("cohensd_e2", date, ".jpg"), width = 9, height = 12, units = "in", res = 1000)
dev.off()

# Include the image in the document
knitr::include_graphics(paste0("cohensd_e2", date, ".jpg"));dev.off()

# Convert image to PDF
daiR::image_to_pdf(paste0("cohensd_e2", date, ".jpg"), pdf_name = paste0("cohensd_e2", date, ".pdf"));#dev.off()

# Convert image to SVG
my_image <- image_read(paste0("cohensd_e2", date, ".jpg"))
my_svg <- image_convert(my_image, format = "svg")
image_write(my_svg, paste0("cohensd_e2", date, ".svg"))

# Fyi: Revising with Copilot the above codes works partially...

Making Heatmaps with Correlations

#The correlations (ok):
#Correlation matrices:
#http://www.sthda.com/english/wiki/visualize-correlation-matrix-using-correlogram
#squares are good for individual associations, because the order is the same
# More info regarding the function:
# https://jokergoo.github.io/circlize_book/book/legends.html
# https://cran.r-project.org/web/packages/ggplotify/vignettes/ggplotify.html
# https://bioinfo4all.wordpress.com/2021/03/13/tutorial-7-how-to-do-chord-diagram-using-r/
# https://jokergoo.github.io/circlize_book/book/advanced-usage-of-chorddiagram.html
# https://jokergoo.github.io/circlize_book/book/a-complex-example-of-chord-diagram.html

tv_c=tv_covscl#tv_half_log22 #cbind(tv[,1:8], tv_half_log2) #check also not logged and then the auto one
tv_c=tv_c[,c(1:3,4:(dim(tv_c)[2]))]#dim(tv_c)[2],
tv_c=tv_c[,!colnames(tv_c) %in% c('Total_TG','PFAS','Perfluorodecyl.ethanoic.acid')]
tv_c=tv_c[,!colnames(tv_c) %in% x4]
colnames(tv_c)[colnames(tv_c)=="17aOH-P4"]="17a-OHP4"
tvf=tv_c[tv_c[,'Gender']==min(tv_c[,'Gender']),1:dim(tv_c)[2]] #tv['Steatosis.Grade.0.To.3'==0,9:27]] #tv[tv[,'Necroinflammation']==0,9:80]; #SG0i=as.numeric(SG0i); check also: tv[tv[,'HOMA-IR']==0,9:80]
tvm=tv_c[tv_c[,'Gender']==max(tv_c[,'Gender']),1:dim(tv_c)[2]]


# rango = function(x,mi,ma) {(ma-mi)/(max(x)-min(x))*(x-min(x))+mi}
dat = tv_c; 
dat=dat[,!colnames(dat) %in% c('Gender','PatientNumber')] #SEX.1F.2M
resulta <- (rcorr(as.matrix(dat), type = c('spearman')))$r #compare pearson # intersect(colnames(resulta), rownames(resulta)) #https://stackoverflow.com/questions/45271448/r-finding-intersection-between-two-vectors
p.mat.a=rcorr(as.matrix(dat), type = c('spearman'))$P; 
p.mat.a[is.na(p.mat.a)]=1; 
p.mat.aa=matrix(p.adjust(p.mat.a,method="BH"),nrow=dim(p.mat.a)[1],ncol=dim(p.mat.a)[2]); 
rownames(p.mat.aa)=rownames(p.mat.a);colnames(p.mat.aa)=colnames(p.mat.a)
# write.csv(resulta,'MASLD_steroid_study correlations with spearman_log_tikka12424.csv')
# resulta=dat
dat=tvf; # 
dat=dat[,!colnames(dat) %in% c('Gender','PatientNumber')] #SEX.1F.2M
resultaf <- (rcorr(as.matrix(dat), type = c('spearman')))$r #compare pearson# intersect(colnames(resultaf), rownames(resultaf)) #https://stackoverflow.com/questions/45271448/r-finding-intersection-between-two-vectors
p.mat.f=rcorr(as.matrix(dat), type = c('spearman'))$P
p.mat.f[is.na(p.mat.f)]=1; 
p.mat.ff=matrix(p.adjust(p.mat.f,method="BH"),nrow=dim(p.mat.f)[1],ncol=dim(p.mat.f)[2]); 
rownames(p.mat.ff)=rownames(p.mat.f);colnames(p.mat.ff)=colnames(p.mat.f)
# write.csv(resultaf,'MASLD_steroid_study female correlations with spearman_log_tikka12424.csv')
dat=tvm;  # 
dat=dat[,!colnames(dat) %in% c('Gender','PatientNumber')] #dat= dat %>% select(-c('Gender')) #this is quite nice way to delete columns, please remember...
resultam <- (rcorr(as.matrix(dat), type = c('spearman')))$r #compare pearson # intersect(colnames(resultam), rownames(resultam)) #https://stackoverflow.com/questions/45271448/r-finding-intersection-between-two-vectors
p.mat.m=rcorr(as.matrix(dat), type = c('spearman'))$P
p.mat.m[is.na(p.mat.m)]=1; 
p.mat.mm=matrix(p.adjust(p.mat.m,method="BH"),nrow=dim(p.mat.m)[1],ncol=dim(p.mat.m)[2]); 
rownames(p.mat.mm)=rownames(p.mat.m);colnames(p.mat.mm)=colnames(p.mat.m)
# write.csv(resultam,'MASLD_steroid_study male correlations with spearman_log_tikka12424.csv')
resulta[resulta==1]=0
resultam[resultam==1]=0
resultaf[resultaf==1]=0;#min(resultaf);max(resultaf);
n_level=1



# https://www.rdocumentation.org/packages/corrplot/versions/0.92/topics/corrplot
# https://cran.r-project.org/web/packages/corrplot/vignettes/corrplot-intro.html
order="original" #alphabet, hclust, original #https://stackoverflow.com/questions/51115495/how-to-keep-order-of-the-correlation-plot-labels-as-same-in-the-datafile
range='orig';corre='re_renormae'; method='color' #color square
jpeg(paste("Correlations with Full Plot of All_vok_nes",n_level,order,range,corre,method,".jpg"), width = 8000, height = 8000, quality = 100,pointsize = 23, res=300);
corrplot(resulta, type = "lower", order = order,method=method, tl.col = "black", tl.srt = 90, diag = FALSE,col = rev(COL2('RdBu')),is.corr = FALSE) #,is.corr = FALSE
dev.off();
eoh=paste("Correlations with Full Plot of All_vok_nes",n_level,order,range,corre,method,".jpg")
daiR::image_to_pdf(eoh, pdf_name=paste0(eoh,'.pdf'))
my_image <- image_read(eoh);my_svg <- image_convert(my_image, format="svg"); image_write(my_svg, paste(eoh,".svg"))
corrplot(resulta, type = "lower", order = order,method=method, tl.col = "black", tl.srt = 90, diag = FALSE,col = rev(COL2('RdBu')),is.corr = FALSE) #,is.corr = FALSE

jpeg(paste("Correlations with Full Plot of Female_voek",n_level,order,range,corre,method,".jpg"), width = 8000, height = 8000, quality = 100,pointsize = 23, res=300);
corrplot(resultaf, type = "lower", order = order,method=method,tl.col = "black", tl.srt = 90, diag = FALSE,col = rev(COL2('RdBu')),is.corr = FALSE)
dev.off();
eoh=paste("Correlations with Full Plot of Female_voek",n_level,order,range,corre,method,".jpg")
daiR::image_to_pdf(eoh, pdf_name=paste0(eoh,'.pdf'))
my_image <- image_read(eoh);my_svg <- image_convert(my_image, format="svg"); image_write(my_svg, paste(eoh,".svg"))
corrplot(resultaf, type = "lower", order = order,method=method,tl.col = "black", tl.srt = 90, diag = FALSE,col = rev(COL2('RdBu')),is.corr = FALSE)

jpeg(paste("Correlations with Full Plot of Male_voeka",n_level,order,range,corre,method,".jpg"), width = 8000, height = 8000, quality = 100,pointsize = 23, res=300);
corrplot(resultam, type = "lower", order = order, method=method,tl.col = "black", tl.srt = 90, diag = FALSE,col = rev(COL2('RdBu')),is.corr = FALSE) #order = "alphabet",  order = "hclust",
dev.off();
eoh=paste("Correlations with Full Plot of Male_voeka",n_level,order,range,corre,method,".jpg")
daiR::image_to_pdf(eoh, pdf_name=paste0(eoh,'.pdf'))
my_image <- image_read(eoh);my_svg <- image_convert(my_image, format="svg"); image_write(my_svg, paste(eoh,".svg"))
corrplot(resultam, type = "lower", order = order, method=method,tl.col = "black", tl.srt = 90, diag = FALSE,col = rev(COL2('RdBu')),is.corr = FALSE) #order = "alphabet",  order = "hclust",

#The ok ones:
x1=colnames(resulta)[c(1:6)]
x1=c(x1[3:6],x1[1],x1[2])#x1[2],
x5=x5[!x5=='Perfluorodecyl.ethanoic.acid']
colnames(resulta)[colnames(resulta)=="17aOH-P4"]="17a-OHP4"
colnames(p.mat.a)[colnames(p.mat.a)=="17aOH-P4"]="17a-OHP4"
x2[x2=="17aOH-P4"]="17a-OHP4"
x2=x2[order(match(x2,groups[,2]))] #https://stackoverflow.com/questions/1568511/how-do-i-sort-one-vector-based-on-values-of-another
x5=x5[!x5=='Perfluorodecyl.ethanoic.acid']
#
# resulta1=resulta[c(x1,x2),x5];p.mat.a1=p.mat.aa[c(x1,x2),x5]
# resulta2=resultaf[c(x1,x2),x5];p.mat.f1=p.mat.ff[c(x1,x2),x5]
# resulta3=resultam[c(x1,x2),x5];p.mat.m1=p.mat.mm[c(x1,x2),x5]
# # tv_ah=rango(resulta3,(min(resulta2)),max(resulta2)); resulta3=tv_ah;#
# hip1='transposesa_kaikki scale';width = 2400;height=6000;pch.cex=1.2;
# ho='PFAS vs. clinical factors and steroids'
# resulta1=t(resulta1);resulta2=t(resulta2);resulta3=t(resulta3)
# p.mat.a1=t(p.mat.a1);p.mat.f1=t(p.mat.f1);p.mat.m1=t(p.mat.m1)
# width = 6000;height=2800;
#
resulta1=resulta[c(x3,x6),x5];p.mat.a1=p.mat.aa[c(x3,x6),x5]
resulta2=resultaf[c(x3,x6),x5];p.mat.f1=p.mat.ff[c(x3,x6),x5]
resulta3=resultam[c(x3,x6),x5];p.mat.m1=p.mat.mm[c(x3,x6),x5]
# tv_ah=rango(resulta3,(min(resulta2)),max(resulta2)); resulta3=tv_ah;#
hip1='transpose';width = 2400;height=6000;pch.cex=1.2;ho='PFAS vs. BAs and lipids'
resulta1=t(resulta1);resulta2=t(resulta2);resulta3=t(resulta3)
p.mat.a1=t(p.mat.a1);p.mat.f1=t(p.mat.f1);p.mat.m1=t(p.mat.m1)
width = 9000;height=2800;

resulta1[resulta1 >0.4] = 0.4
resulta1[resulta1 < -0.4] = -0.4

resulta2[resulta2 >0.4] = 0.4
resulta2[resulta2 < -0.4] = -0.4

resulta3[resulta3 >0.4] = 0.4
resulta3[resulta3 < -0.4] = -0.4

# hist(as.numeric(unlist(resulta1)),breaks=30,ylim=c(0.0,40))  #xlim=c(0.04,0.4),
# resulta1[resulta1 > 1]  = 1
# resulta1[resulta1 < -1] = -1 #col.lim=c(-0.4,0.4))
#
# #https://www.rdocumentation.org/packages/corrplot/versions/0.92/topics/corrplot
# #https://cran.r-project.org/web/packages/corrplot/vignettes/corrplot-intro.html
# #https://statisticsglobe.com/change-font-size-corrplot-r
# #order can be: alphabet, hclust, original #https://stackoverflow.com/questions/51115495/how-to-keep-order-of-the-correlation-plot-labels-as-same-in-the-datafile
#
order="original"; range='orig';corre='no_rendorm'; type='full'; method='color';ga='All';gf='Female';gm='Male' #color square
col = colorRampPalette(c('blue', 'white','orange'), alpha = TRUE)(100)
cl.offset=1.0;cl.length=5;cl.cex = 1.3;pch.cex=1.3;pch=20;cl.pos = 'r';#cl.pos = 'b' ;#pch.cex=0.95,1.3; height=6300; pos 'b' cl.pos = 'b'
jpeg(paste("Square Correlation Plot ofdd",ho,ga,hip1,"3.jpg"), width = width, height = height, quality = 100,pointsize = 30, res=300);# par( ps=ps)# par(cex.lab=90)
corrplot(resulta1, type = type, order = order,method=method, p.mat=p.mat.a1, tl.col = "black", #sum(COL2('RdBu')=="#FF7417")
         cl.cex = cl.cex, pch.cex=pch.cex, pch.col='black',pch=pch,#pitikö vain pch lisätä pch väriin väriin... mystistä...'#FEE12B'
sig.level = c(.001,.05, .2),cl.pos = cl.pos, insig = "label_sig", cl.offset=cl.offset,cl.length=cl.length,
tl.srt = 90, diag = TRUE,col = col,is.corr = FALSE, col.lim=c(-0.4,0.4) ) #only in age...0.001,
dev.off();eoh=paste("Square Correlation Plot ofdd",ho,ga,hip1,"3.jpg")
daiR::image_to_pdf(eoh, pdf_name=paste0(eoh,'.pdf'))
my_image <- image_read(eoh);my_svg <- image_convert(my_image, format="svg"); image_write(my_svg, paste(eoh,".svg"))
# pch.cex=1.3;
jpeg(paste("Square Correlation Plot ofd",ho,gf,hip1,"3.jpg"), width = width, height = height, quality = 100,pointsize = 30, res=300);
corrplot(resulta2, type = type, order = order,method=method, p.mat=p.mat.f1,tl.col = "black",
         cl.cex = cl.cex,  pch.cex=pch.cex,pch.col='black',pch=pch,
sig.level = c(.001, .05, .2), cl.pos = cl.pos, insig = "label_sig",cl.offset=cl.offset,cl.length=cl.length,
tl.srt = 90, diag = TRUE,col = col,is.corr = FALSE,col.lim=c(-0.4,0.4)) #
dev.off();eoh=paste("Square Correlation Plot ofd",ho,gf,hip1,"3.jpg")
daiR::image_to_pdf(eoh, pdf_name=paste0(eoh,'.pdf'))
my_image <- image_read(eoh);my_svg <- image_convert(my_image, format="svg"); image_write(my_svg, paste(eoh,".svg"))
# pch.cex=2.9;
jpeg(paste("Square Correlation Plot ofd",ho,gm,hip1,"3.jpg"), width = width, height = height, quality = 100,pointsize = 30, res=300);
corrplot(resulta3, type = type, order = order,method=method, p.mat=p.mat.m1, tl.col = "black", cl.cex = cl.cex,pch.cex=pch.cex,
         pch.col='black',pch=pch,
sig.level = c(.001, .05, .2),cl.pos = cl.pos, insig = "label_sig",cl.offset=cl.offset,cl.length=cl.length,
tl.srt = 90, diag = TRUE,col = col,is.corr = FALSE,col.lim=c(-0.4,0.4)) #,is.corr = FALSE
dev.off();eoh=paste("Square Correlation Plot ofd",ho,gm,hip1,"3.jpg")
daiR::image_to_pdf(eoh, pdf_name=paste0(eoh,'.pdf'))
my_image <- image_read(eoh);my_svg <- image_convert(my_image, format="svg"); image_write(my_svg, paste(eoh,".svg"))
#
#
resulta1=resulta[c(x1,x3,x6),x2];  p.mat.a1=p.mat.aa[c(x1,x3,x6),x2]
resulta2=resultaf[c(x1,x3,x6),x2]; p.mat.f1=p.mat.ff[c(x1,x3,x6),x2]
resulta3=resultam[c(x1,x3,x6),x2]; p.mat.m1=p.mat.mm[c(x1,x3,x6),x2]
# tv_ah=rango(resulta3,(min(resulta2)),max(resulta2)); resulta3=tv_ah;#
hip1='transpose'; width = 3700;height=6300;ho='steroids vs. all others except PFAS';ps=28 #pch=10;
# min(c(resulta2)); max(c(resulta2)) #These are around -0.4 and 0.4
col = colorRampPalette(c('blue', 'white','orange'), alpha = TRUE)(100)

# path="C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis/"; setwd(path)
resulta1[resulta1 >0.5] = 0.5
resulta1[resulta1 < -0.5] = -0.5

resulta2[resulta2 >0.5] = 0.5
resulta2[resulta2 < -0.5] = -0.5

resulta3[resulta3 >0.5] = 0.5
resulta3[resulta3 < -0.5] = -0.5
#
#
# resulta1=resulta[c(x3,x6),x2];  p.mat.a1=p.mat.aa[c(x3,x6),x2]
# resulta2=resultaf[c(x3,x6),x2]; p.mat.f1=p.mat.ff[c(x3,x6),x2]
# resulta3=resultam[c(x3,x6),x2]; p.mat.m1=p.mat.mm[c(x3,x6),x2]
# # tv_ah=rango(resulta3,(min(resulta2)),max(resulta2)); resulta3=tv_ah;#
# resulta1=t(resulta1);resulta2=t(resulta2);resulta3=t(resulta3);p.mat.a1=t(p.mat.a1);p.mat.f1=t(p.mat.f1);p.mat.m1=t(p.mat.m1);
#
# # resulta1[resulta1 > 0.25] = 0.4
# # resulta1[resulta1 < -0.25] = -0.4
# hist(as.numeric(unlist(resulta1)),breaks=30,ylim=c(0.0,40))  #xlim=c(0.04,0.4),
#
hip1='transpose'; width = 3200;height=2000;ho='steroids vs. all others except PFAS';ps=12 #pch=10;
min(c(resulta2)); max(c(resulta2)) #These are around -0.4 and 0.4
col = colorRampPalette(c('blue', 'white','orange'), alpha = TRUE)(150)

# resulta1=resulta1[,groups[,'Abbreviation']];resulta2=resulta2[,groups[,'Abbreviation']];resulta3=resulta3[,groups[,'Abbreviation']]

order="original"; range='orig';corre='no_renorm'; type='full'; method='color';ga='All';gf='Female';gm='Male' #color square
cl.offset=1.0;cl.length=5;cl.cex = 1.0;pch.cex=1.0;pch=20;cl.pos = 'r';#cl.pos = 'b' ;#pch.cex=0.95,1.3; height=6300; pos 'b' cl.pos = 'b'
jpeg(paste("Square Correlation Plot ofrra",ho,ga,hip1,"3.jpg"), width = width, height = height, quality = 100,pointsize = 14, res=300);# par( ps=ps)# par(cex.lab=90)
corrplot(resulta1, type = type, order = order,method=method, p.mat=p.mat.a1, tl.col = "black", #sum(COL2('RdBu')=="#FF7417")
         cl.cex = cl.cex, pch.cex=pch.cex, pch.col='black',pch=pch,#pitikö vain pch lisätä pch väriin väriin... mystistä...'#FEE12B'
sig.level = c(.001,.05, .2),cl.pos = cl.pos, insig = "label_sig", cl.offset=cl.offset,cl.length=cl.length,
tl.srt = 90, diag = TRUE,col = col,is.corr = FALSE,col.lim=c(-0.5,0.5)) #only in age...0.001, is.corr = TRUE/FALSE rev(COL2('RdBu')[1:(length(COL2('RdBu'))-0)])
dev.off();eoh=paste("Square Correlation Plot ofrr",ho,ga,hip1,"3.jpg")
daiR::image_to_pdf(eoh, pdf_name=paste0(eoh,'.pdf'))
my_image <- image_read(eoh);my_svg <- image_convert(my_image, format="svg"); image_write(my_svg, paste(eoh,".svg"))

pch.cex=1.3;
jpeg(paste("Square Correlation Plot ofa",ho,gf,hip1,"3.jpg"), width = width, height = height, quality = 100,pointsize = 16, res=300);
corrplot(resulta2, type = type, order = order,method=method, p.mat=p.mat.f1,tl.col = "black",
         cl.cex = cl.cex,  pch.cex=pch.cex,pch.col='black',pch=pch,
sig.level = c(.001, .05, .2), cl.pos = cl.pos, insig = "label_sig",cl.offset=cl.offset,cl.length=cl.length,
tl.srt = 90, diag = TRUE,col = col,,is.corr = FALSE,col.lim=c(-0.5,0.5)) #
dev.off();eoh=paste("Square Correlation Plot of",ho,gf,hip1,"3.jpg")
daiR::image_to_pdf(eoh, pdf_name=paste0(eoh,'.pdf'))
my_image <- image_read(eoh);my_svg <- image_convert(my_image, format="svg"); image_write(my_svg, paste(eoh,".svg"))

# pch.cex=2.9;
jpeg(paste("Square Correlation Plot ofa",ho,gm,hip1,"3.jpg"), width = width, height = height, quality = 100,pointsize = 16, res=300);
corrplot(resulta3, type = type, order = order,method=method, p.mat=p.mat.m1, tl.col = "black", cl.cex = cl.cex,pch.cex=pch.cex,
         pch.col='black',pch=pch,
sig.level = c(.001, .05, .2),cl.pos = cl.pos, insig = "label_sig",cl.offset=cl.offset,cl.length=cl.length,
tl.srt = 90, diag = TRUE,col = col,,is.corr = FALSE,col.lim=c(-0.5,0.5)) #,is.corr = FALSE
dev.off();eoh=paste("Square Correlation Plot of",ho,gm,hip1,"3.jpg")
daiR::image_to_pdf(eoh, pdf_name=paste0(eoh,'.pdf'))
my_image <- image_read(eoh);my_svg <- image_convert(my_image, format="svg"); image_write(my_svg, paste(eoh,".svg"))

#Tiedoks: Copilotilla ei järkee edelliseen, vaikka näyttää hieman siistimmältä.

Making Heatmaps with Linear Model Estimates

# You may need a rather big function to calculate the estimates and plot at the same time, since the spaces of exper. interest have been reduced from the max dataset size.

# Eli maksimilla vedetään... eli pitäis olla ok, sillä skaalattu vastaa korrelaatiota tss. skaalaus vielä miehiin...
the_funal=function(tv,Group,ok,fn,adj,sig.level,sick,sick_group,joo) { #  ok,aa,bb
  tv=tv_covscl
  if (Group=='male') {NAFLDo=tv[tv[,'Gender']==max(tv[,'Gender']),]} else if (Group=='female') 
  {NAFLDo=tv[tv[,'Gender']==min(tv[,'Gender']),]} else if (Group=='All') {NAFLDo=tv}
  SG0=NAFLDo[,c(2:dim(tv)[2])]
  #https://stackoverflow.com/questions/10688137/how-to-fix-spaces-in-column-names-of-a-data-frame-remove-spaces-inject-dots
  oknames=colnames(SG0)
  SG0=data.frame(SG0)
  colnames(SG0)
  colnames(SG0[,8:27]) <- gsub("-", ".", colnames(SG0[,8:27]))
  colnames(SG0[,8:27]) <- gsub("/", ".", colnames(SG0[,8:27]))
  hesh=c()
  
  Treatment=colnames(tv_all)[52:58];
  Mediator=colnames(tv_all)[9:28];
  Outcome=colnames(tv_all)[c(29:51,59:71)];
  
  xnam <- colnames(SG0)[c(4:7)]
  Treatment2=Treatment
  y <- Treatment2 
  hoesh=c()
  
  j=1;i=1;p.val=c()
  for (i in 1:length(xnam)) {
    for (j in 1:length(y)) {
      if (Group!='All')  {fmla <- as.formula(paste(paste(c(y[j]," ~ "), collapse= ""), paste(c(xnam[i],'BMI','AGE'), collapse= "+")))} else if (Group=='All') 
      {fmla <- as.formula(paste(paste(c(y[j]," ~ "), collapse= ""), paste(c(xnam[i],'BMI','AGE','Gender'), collapse= "+")))} #https://stats.stackexchange.com/questions/190763/how-to-decide-which-glm-family-to-use
      poissone=lm( fmla, data=SG0) # anova(poissone);# poissone
      p.val=c();p.val <- anova(poissone)$'Pr(>F)'[1]
      ps=summary(poissone);
      pss=ps[[4]] #https://gettinggeneticsdone.blogspot.com/2011/01/rstats-function-for-extracting-f-test-p.html
      hoesh=c(y[j],xnam[i],Group,pss[2,1],pss[2,4],pss[2,2])

      jeps=SG0# 
      r=as.numeric(hoesh[4]) #This was previuosly 'hösh', but for some reason it started to not work. Suom. tää oli aikasemmin 'hösh', mutta sitten tuli ongelmia. Hi hii.
      p=as.numeric(hoesh[5])
      rsadj=as.numeric(hoesh[6])
      colnames(jeps)=colnames(tv)[2:dim(tv)[2]]
      Treatment=hoesh[2]
      Mediator=hoesh[1]
      rm(hoesh)
     
      hesh=rbind(hesh,c(y[j],xnam[i],Group,r,pss[2,4],rsadj))
    }}
  

  j=1;i=1; rm(xnam,y)
  xnam <- colnames(SG0)[c(2)]
  y <- Treatment2#colnames(SG0)[c(70:76)]
  for (i in 1:length(xnam)) {
    for (j in 1:length(y)) {
      if (Group!='All')  {fmla <- as.formula(paste(paste(c(y[j]," ~ "), collapse= ""), paste(c(xnam[i],'BMI','AGE'), collapse= "+")))} else if (Group=='All') 
      {fmla <- as.formula(paste(paste(c(y[j]," ~ "), collapse= ""), paste(c(xnam[i],'BMI','AGE','Gender'), collapse= "+")))} #https://stats.stackexchange.com/questions/190763/how-to-decide-which-glm-family-to-use
      poissone=lm( fmla, data=SG0) # anova(poissone);# poissone
      p.val=c();p.val <- anova(poissone)$'Pr(>F)'[1]
      ps=summary(poissone);
      pss=ps[[4]] # Some pondering what to put as p values:
      # uh=c();uh=summary(poissone)$fstatistic # https://gettinggeneticsdone.blogspot.com/2011/01/rstats-function-for-extracting-f-test-p.html
      # p.val <- pf(uh[1], df1 = uh[2], df2 = uh[3],lower.tail=F)
      hoesh=c(y[j],xnam[i],Group,pss[2,1],pss[2,4],pss[2,2])
      
      jeps=SG0# 
      r=as.numeric(hoesh[4])
      p=as.numeric(hoesh[5])
      rsadj=as.numeric(hoesh[6])
      colnames(jeps)=colnames(tv)[2:dim(tv)[2]]
      Treatment=hoesh[2]
      Mediator=hoesh[1]
      rm(hoesh)
      
      hesh=rbind(hesh,c(y[j],xnam[i],Group,r,pss[2,4],rsadj))}}
  
  j=1;i=1; rm(xnam,y)
  xnam <- colnames(SG0)[c(3)];y <- Treatment2#colnames(SG0)[c(70:76)]
  for (i in 1:length(xnam)) {
    for (j in 1:length(y)) {
      if (Group!='All')  {fmla <- as.formula(paste(paste(c(y[j]," ~ "), collapse= ""), paste(c(xnam[i],'BMI','AGE'), collapse= "+")))} else if (Group=='All') 
      {fmla <- as.formula(paste(paste(c(y[j]," ~ "), collapse= ""), paste(c(xnam[i],'BMI','AGE','Gender'), collapse= "+")))} 
      #https://stats.stackexchange.com/questions/190763/how-to-decide-which-glm-family-to-use
      poissone=lm( fmla, data=SG0) # anova(poissone);# poissone
      p.val=c();p.val <- anova(poissone)$'Pr(>F)'[1]
      ps=summary(poissone);
      pss=ps[[4]] # fmla <- as.formula(paste(paste(c(colnames(SG0[,8:27])[j]," ~ "), collapse= ""), paste(c(xnam[i],'BMI','AGE','Gender'), collapse= "+")))
      uh=c();uh=summary(poissone)$fstatistic # https://gettinggeneticsdone.blogspot.com/2011/01/rstats-function-for-extracting-f-test-p.html
      hoesh=c(y[j],xnam[i],Group,pss[2,1],pss[2,4],pss[2,2])
      jeps=SG0# 
      r=as.numeric(hoesh[4])
      p=as.numeric(hoesh[5])
      rsadj=as.numeric(hoesh[6])
      colnames(jeps)=colnames(tv)[2:dim(tv)[2]]
      Treatment=hoesh[2]
      Mediator=hoesh[1]
      rm(hoesh)
      
      hesh=rbind(hesh,c(y[j],xnam[i],Group,r,pss[2,4],rsadj))}}
  

  j=1;i=1; rm(xnam,y)
  xnam <- Treatment2
  y = colnames(SG0[,8:27])
  for (i in 1:length(xnam)) {
    for (j in 1:length(y)) {
      if (Group!='All')  {fmla <- as.formula(paste(paste(c(y[j]," ~ "), collapse= ""), paste(c(xnam[i],'BMI','AGE'), collapse= "+")))} else if (Group=='All') 
      {fmla <- as.formula(paste(paste(c(y[j]," ~ "), collapse= ""), paste(c(xnam[i],'BMI','AGE','Gender'), collapse= "+")))} #https://stats.stackexchange.com/questions/190763/how-to-decide-which-glm-family-to-use
      poissone=lm( fmla, data=SG0) 
      p.val=c();p.val <- anova(poissone)$'Pr(>F)'[1]
      ps=summary(poissone);
      pss=ps[[4]] # 
      uh=c();uh=summary(poissone)$fstatistic # https://gettinggeneticsdone.blogspot.com/2011/01/rstats-function-for-extracting-f-test-p.html
      hoesh=c(y[j],xnam[i],Group,pss[2,1],pss[2,4],pss[2,2])
      jeps=SG0# 
      r=as.numeric(hoesh[4])
      p=as.numeric(hoesh[5])
      rsadj=as.numeric(hoesh[6])
      colnames(jeps)=colnames(tv)[2:dim(tv)[2]]
      Treatment=hoesh[2]
      Mediator=hoesh[1]
      hesh=rbind(hesh,c(y[j],xnam[i],Group,r,pss[2,4],rsadj))}}

  # # 1)  steroids=BA/lipid
  
  # if (Group!='All') {
  j=1;i=1; rm(xnam,y)
  xnam <- c(x3,x6) # Group='All'Treatment2
  y <- c(colnames(SG0[,8:27])); #colnames(SG0)[c(4:7)]
  
  for (i in 1:length(xnam)) {
    for (j in 1:length(y)) {
      # j=10
      if (Group!='All')  {fmla <- as.formula(paste(paste(c(y[j]," ~ "), collapse= ""), paste(c(xnam[i],'BMI','AGE'), collapse= "+")))} else if (Group=='All')
      {fmla <- as.formula(paste(paste(c(y[j]," ~ "), collapse= ""), paste(c(xnam[i],'BMI','AGE','Gender'), collapse= "+")))} #https://stats.stackexchange.com/questions/190763/how-to-decide-which-glm-family-to-use
      poissone=lm( fmla, data=SG0) # anova(poissone);# poissone
      p.val=c();
      p.val <- anova(poissone)$'Pr(>F)'[1]
      ps=summary(poissone);
      pss=ps[[4]] # fmla <- as.formula(paste(paste(c(colnames(SG0[,8:27])[j]," ~ "), collapse= ""), paste(c(xnam[i],'BMI','AGE','Gender'), collapse= "+")))
      uh=summary(poissone)$fstatistic # https://gettinggeneticsdone.blogspot.com/2011/01/rstats-function-for-extracting-f-test-p.html
      p.vala <- pf(uh[1], df1 = uh[2], df2 = uh[3],lower.tail=F)
      hoesh=c(y[j],xnam[i],Group,pss[2,1],pss[2,4],pss[2,2])

      jeps=SG0# 
      r=as.numeric(hoesh[4])
      p=as.numeric(hoesh[5])
      rsadj=as.numeric(hoesh[6])
      colnames(jeps)=colnames(tv)[2:dim(tv)[2]]
      Treatment=hoesh[2]
      Mediator=hoesh[1]
      rm(hoesh)

      hesh=rbind(hesh,c(y[j],xnam[i],Group,r,pss[2,4],rsadj))}}

  # ja 2) steroid = covar
  j=1;i=1; rm(xnam,y)
  xnam <- c('AGE','BMI',colnames(SG0)[c(4:7)]); 
  y <- c(colnames(SG0[,8:27])) # Group='All'
  
  for (i in 1:length(xnam)) {
    for (j in 1:length(y)) {
      if (Group!='All')  {fmla <- as.formula(paste(paste(c(y[j]," ~ "), collapse= ""), paste(c(xnam[i],'BMI','AGE'), collapse= "+")))} else if (Group=='All') 
      {fmla <- as.formula(paste(paste(c(y[j]," ~ "), collapse= ""), paste(c(xnam[i],'BMI','AGE','Gender'), collapse= "+")))} #https://stats.stackexchange.com/questions/190763/how-to-decide-which-glm-family-to-use
      poissone=lm( fmla, data=SG0) # anova(poissone);# poissone
      p.val=c();p.val <- anova(poissone)$'Pr(>F)'[1]
      ps=summary(poissone);
      pss=ps[[4]] # fmla <- as.formula(paste(paste(c(colnames(SG0[,8:27])[j]," ~ "), collapse= ""), paste(c(xnam[i],'BMI','AGE','Gender'), collapse= "+")))
      hoesh=c(y[j],xnam[i],Group,pss[2,1],pss[2,4],pss[2,2])

      jeps=SG0# 
      r=as.numeric(hoesh[4])
      p=as.numeric(hoesh[5])
      rsadj=as.numeric(hoesh[6])
      colnames(jeps)=colnames(tv)[2:dim(tv)[2]]
      Treatment=hoesh[2]
      Mediator=hoesh[1]
      rm(hoesh)
      
      hesh=rbind(hesh,c(y[j],xnam[i],Group,r,pss[2,4],rsadj))}}

  # ja 2) steroid = covar
  j=1;i=1; rm(xnam,y)
  xnam <- c(x3,x6) # Group='All'Treatment2
  y <- colnames(tv_all)[52:58];

  for (i in 1:length(xnam)) {
    for (j in 1:length(y)) {
      if (Group!='All')  {fmla <- as.formula(paste(paste(c(y[j]," ~ "), collapse= ""), paste(c(xnam[i],'BMI','AGE'), collapse= "+")))} else if (Group=='All')
      {fmla <- as.formula(paste(paste(c(y[j]," ~ "), collapse= ""), paste(c(xnam[i],'BMI','AGE','Gender'), collapse= "+")))} #https://stats.stackexchange.com/questions/190763/how-to-decide-which-glm-family-to-use
      poissone=lm( fmla, data=SG0) # anova(poissone);# poissone
      p.val=c();p.val <- anova(poissone)$'Pr(>F)'[1]
      ps=summary(poissone);
      pss=ps[[4]] # fmla <- as.formula(paste(paste(c(colnames(SG0[,8:27])[j]," ~ "), collapse= ""), paste(c(xnam[i],'BMI','AGE','Gender'), collapse= "+")))
      hoesh=c(y[j],xnam[i],Group,pss[2,1],pss[2,4],pss[2,2])

      jeps=SG0#
      r=as.numeric(hoesh[4])
      p=as.numeric(hoesh[5])
      rsadj=as.numeric(hoesh[6])
      colnames(jeps)=colnames(tv)[2:dim(tv)[2]]
      Treatment=hoesh[2]
      Mediator=hoesh[1]
      rm(hoesh)

      hesh=rbind(hesh,c(y[j],xnam[i],Group,r,pss[2,4],rsadj))}}
  
  
    
  hesa=hesh
  hoi=as.data.frame(hesh)
  hopiu=hoi
  colnames(hopiu)=c('y','x','Gender','r','p','var_x')
  colnames(hoi)=c('y','x','Gender','r','p','var_x')
  
  #This in case you want to print to your local computer: ... :)
  # main_dir <- paste0(c("C://Users//patati//Desktop//Turku//R//",fn),collapse="")
  main_dir <- paste0(c("C://Users//patati//Documents//GitHub//Steroid_Data_Analysis//"),collapse="")
  setwd(main_dir)
  
  meds=names(table(hoi[,1]))[!names(table(hoi[,1])) %in% c(x3,x5,x6)]
  covas=c('Steatosis.Grade','Fibrosis.Stage','Necroinflammation','HOMA.IR','AGE','BMI')
  
  if (adj=='ok') {
    # p.adjust(p=hopiu[,5], method = 'BH', n = length(hopiu[,5]))
    hoi[,5]=p.adjust(p=hopiu[,5], method = 'BH', n = length(hopiu[,5]))
    hopiu[,5]=p.adjust(p=hopiu[,5], method = 'BH', n = length(hopiu[,5]))
  }
  
  if (ok=='big') {
    
    rsa=c();joi=c()
    
    # Eli 'kaksi' vielä tarvitaan...
    # 1) BA/lipid=covar ja steroidit, ja 2) steroid=covar

    meds=names(table(hoi[,1]))[!names(table(hoi[,1])) %in% c(x3,x5,x6)]
    covas=c('Steatosis.Grade','Fibrosis.Stage','Necroinflammation','HOMA.IR','AGE','BMI')
    
    c1=hoi[,2] %in% covas
    c2=hoi[,1] %in% meds
    hyy=c1 & c2
    m1=hoi[hyy,]
    colnames(m1)=c('y','x','Gender','r','p','var_x') #c('y','x','Gender','r','p','radj')
    c1=hoi[,2] %in% c(x3,x6); c2=hoi[,1] %in% meds
    hyy=c1 & c2; m2=hoi[hyy,]
    colnames(m2)=c('y','x','Gender','r','p','var_x') # hist(as.numeric(m2[,6]),breaks=50)
    joi=rbind(m1,m2)
    i=4;rs=c()
    
    for (i in 4:5) {
      rs=joi[,c(1,2,i)] # rs=data.frame(rs)
      rs=reshape(rs,idvar="x",timevar="y",direction="wide")
      rownames(rs)=rs[,1]
      rs=rs[,-1]

      library(stringr)
      colnames(rs)=str_sub(colnames(rs),3,-1)
      
      colnames(rs) <- gsub("\\.", "-", colnames(rs))
      colnames(rs) <- gsub("X11", "11", colnames(rs))
      colnames(rs) <- gsub("X17", "17", colnames(rs))
      colnames(rs)[colnames(rs)=="T-Epi-T"]="T/Epi-T"
      
      rownames(rs)[rownames(rs)=="Steatosis.Grade"]="Steatosis Grade"
      rownames(rs)[rownames(rs)=="Fibrosis.Stage"]="Fibrosis Stage"
      rownames(rs)[rownames(rs)=="HOMA.IR"]="HOMA-IR"
      covas[covas=="Steatosis.Grade"]="Steatosis Grade"
      covas[covas=="Fibrosis.Stage"]="Fibrosis Stage"
      covas[covas=="HOMA.IR"]="HOMA-IR"
      
      heps=c(groups[,2]) #check that you have driven the steroid data vis file...
      heps[heps=="17aOH-P4"]="17a-OHP4"
      cme1=match(heps,colnames(rs))
      cme2=match(c(covas,x3,x6),rownames(rs))
      rs=rs[cme2,cme1]
      rsa=rbind(rsa,rs) }
    
    
    rs1a=rsa[1:dim(rs)[1],];
    rs2a=rsa[(dim(rs1a)[1]+1):(dim(rs1a)[1]+dim(rs1a)[1]),]
  
    rs1=rs1a;rs2=rs2a
    rownames(rs2)=str_sub(rownames(rs2), end = -2)
    rownames(rs1) <- gsub("\\.", " ", rownames(rs1))
    rownames(rs2) <- gsub("\\.", " ", rownames(rs2))
    rownames(rs1)[rownames(rs1)=="HOMA IR"]="HOMA-IR";rownames(rs2)[rownames(rs2)=="HOMA IR"]="HOMA-IR"
    
    rownames(rs1)[rownames(rs1)=="Gender"]="HOMA-IR";rownames(rs2)[rownames(rs2)=="HOMA IR"]="HOMA-IR"
    
    rango = function(x,mi,ma) {(ma-mi)/(max(x)-min(x))*(x-min(x))+mi}
    rs1 <- mutate_all(rs1, function(x) as.numeric(as.character(x)))
    rs2 <- mutate_all(rs2, function(x) as.numeric(as.character(x)))
    # rs1=rango(rs1,-0.5,0.5) #check this if needed
  
    rs1=as.matrix(rs1)
    rs2=as.matrix(rs2)
    
        rs1[rs1>0.5]=0.5;rs1[rs1 < -0.5]=-0.5;

    
    width=2500; height=4400
    order="original"; range='orig';corre='no_renorm'; type='full'; method='color';#ga='All';gf='Female';gm='Male' #color square
    cl.offset=1.0;cl.length=15;cl.cex = 1.09;pch.cex=1.09;pch=11;cl.pos = 'n';#cl.pos = 'b' ;#pch.cex=0.95,1.3; height=6300; pos 'b' cl.pos = 'b'
    ho=Group;hip1='BAs_lipids_as_y vs. steroids_as_x'

    # https://www.rdocumentation.org/packages/corrplot/versions/0.94/topics/corrplot
    # Oh! https://www.tidyverse.org/blog/2020/08/taking-control-of-plot-scaling/
    # https://r4ds.had.co.nz/graphics-for-communication.html#figure-sizing

#I have driven these separately for html: 
    # for that you need:     
    jpeg(paste("Linear Model Estimate Plot ofees5",hip1,Group,".jpg"), width = width, height = height, quality = 100,pointsize = 16, res=300);

    hepio=colorRampPalette(c('blue', 'white','orange'), alpha = TRUE)(150) #rev(COL2('RdBu')[25:(length(COL2('RdBu'))-25)])
    
    
    
    corrplot(rs1, type = type, order = order,method=method, p.mat=rs2, tl.col = "black", cl.cex = cl.cex,pch.cex=pch.cex,pch.col='black',pch=pch, ,sig.level = c(.001, .01, .05),cl.pos = cl.pos, 
             insig = "label_sig",cl.offset=cl.offset,cl.length=cl.length,tl.cex=0.5, tl.srt = 90, diag = TRUE, #tl.pos='n'
             col = colorRampPalette(c('blue', 'white','orange'), alpha = TRUE)(100) ,is.corr = FALSE,col.lim=c(-0.5,0.5));   #https://cran.r-project.org/web/packages/corrplot/vignettes/corrplot-intro.html#change-color-spectra-color-legend-and-text-legend
    
    dev.off()  #,is.corr = FALSE
    eoh=paste("Linear Model Estimate Plot ofees5",hip1,Group,".jpg")
    daiR::image_to_pdf(eoh, pdf_name=paste0(eoh,'.pdf'))
    my_image <- image_read(eoh);my_svg <- image_convert(my_image, format="svg"); image_write(my_svg, paste(eoh,".svg"))

    #

    #https://stackoverflow.com/questions/26574054/how-to-change-font-size-of-the-correlation-coefficient-in-corrplot
    #https://stackoverflow.com/questions/9543343/plot-a-jpg-image-using-base-graphics-in-r
   #oh, classical: https://forum.posit.co/t/r-markdown-html-document-doesnt-show-image/41629/2

    
  } else {
    
    rsa=c();rs1=c();rs2=c()
  
    c1=hoi[,1] %in% x5
    hoi[c1,] # This gives you the PFAS (x5) ok. Tää tulee suoraan tällä PFAS 7:lle ekalle
    c2=hoi[,1] %in% names(table(hoi[,1]))[!names(table(hoi[,1])) %in% c(x3,x5,x6)] #vaikeemman kautta
    hyy=c1 & c2
    hoi2=hoi[c2 | c1 ,] #Likewise...
    rownames(hoi2)=1:dim(hoi2)[1]
    hoi2=hoi2[1:182,]
    a=hoi2[1:42,1];b=hoi2[1:42,2]
    hoi2[1:42,]=cbind(b,a,hoi2[1:42,3:5])    
    hoi2=hoi2[,c(2,1,3:5)] 
    
    i=4;
    rse=c()
    for (i in 4:5) {
      
      rse=hoi2[,c(1,2,i)] 
      rse=rse[order(rse[,1]),]
      rs=reshape(rse,idvar="x",timevar="y",direction="wide")
      rownames(rs)=rs[,1]
      rs=rs[,-1]
      
      library(stringr) 
      colnames(rs)=str_sub(colnames(rs),3,-1)

      colnames(rs) <- gsub("\\.", "-", colnames(rs))
      colnames(rs) <- gsub("X11", "11", colnames(rs))
      colnames(rs) <- gsub("X17", "17", colnames(rs))
      colnames(rs)[colnames(rs)=="T-Epi-T"]="T/Epi-T"
      colnames(rs)[colnames(rs)=="T-E-T"]="T/Epi-T"
      colnames(rs)[colnames(rs)=="Steatosis-Grade"]="Steatosis Grade"
      colnames(rs)[colnames(rs)=="Fibrosis-Stage"]="Fibrosis Stage"
      colnames(rs)[colnames(rs)=="17aOH-P4"]="17a-OHP4"
      heps=c(covas,groups[,2])
      heps <- gsub("\\.", " ", heps)
      heps[heps=="HOMA IR"]="HOMA-IR"
      heps[heps=="17aOH-P4"]="17a-OHP4"
      ccc=match(heps,colnames(rs))

      rs=rs[,ccc]
      rsa=rbind(rsa,rs) 
    }
    
    rs1a=rsa[1:7,];
    rs2a=rsa[8:14,]
    rs1=rs1a;rs2=rs2a
    
    

    rs1 <- mutate_all(rs1, function(x) as.numeric(as.character(x)))
    rs2 <- mutate_all(rs2, function(x) as.numeric(as.character(x)))
    # rs1=rango(rs1,-0.5,0.5) #check if needed

    rs1=as.matrix(rs1)
    rs2=as.matrix(rs2)
    
    rs1[rs1>0.4]=0.4;rs1[rs1 < -0.4]=-0.4;

  order="original"; range='orig';corre='no_renorm'; type='full'; method='color'; #ga='All';gf='Female';gm='Male' #color square
  cl.offset=1.0;cl.length=11;cl.cex = 1.4;pch.cex=1.5;pch=20;cl.pos = 'n';#cl.pos = 'b' ;#pch.cex=0.95,1.3; height=6300; pos 'b' cl.pos = 'b'
  ho=Group;hip1='Steroids_y vs. PFAS_as_x'
  width=5500; height=1800
#I have driven these separately for html: 

  
  
  
jpeg(paste("Linear Model Estimate Plot of_sa5",hip1,Group,".jpg"), width = width, height = height, quality = 100,pointsize = 16, res=300);
  corrplot(rs1, type = type, order = order,method=method, p.mat=rs2, tl.col = "black", cl.cex = cl.cex,pch.cex=pch.cex,pch.col='black',pch=pch,
sig.level = c(.001, .01, .05),cl.pos =  cl.pos, insig = "label_sig",cl.offset=cl.offset,cl.length=cl.length, tl.cex=0.8, #tl.pos='n',
tl.srt = 90, diag = TRUE,col = colorRampPalette(c('blue','white', 'orange'), alpha = TRUE)(100),is.corr = FALSE,col.lim=c(-0.4,0.4)); 

# https://cran.r-project.org/web/packages/corrplot/vignettes/corrplot-intro.html#change-color-spectra-color-legend-and-text-legend
dev.off();
    eoh=paste("Linear Model Estimate Plot of_sa5",hip1,Group,".jpg")
    daiR::image_to_pdf(eoh, pdf_name=paste0(eoh,'.pdf'))
    my_image <- image_read(eoh);my_svg <- image_convert(my_image, format="svg"); image_write(my_svg, paste(eoh,".svg"))


  } 
  

  return(list(hopiu))
}

# The scaling here just in case:
rango = function(x,mi,ma) {(ma-mi)/(max(x)-min(x))*(x-min(x))+mi}

#To apply to all groups at one go:
huus=function(tv,adj,sig.level,sick,sick_group,joo) {
  huus=c();huusa=c();heijaa=c('All','female','male'); ok=c('big','small')  ; jj=c()
  hyp=1;hrt=1;#oo="C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis/lme/"
  for (hyp in 1:2) {
    for (hrt in 1:3) {
      # the_funal=function(tv,Group,ok,aa,bb,fn,adj)
      huus=append(huus,the_funal(tv,heijaa[hrt],ok[hyp],fn,adj,sig.level,sick,sick_group,joo))}}

  return(huus)}

# Driving the function with the parameters as follows:
adj='nook'; sig.level=c(.001,0.01, 0.05); sick='no'; joo='joo' #sickGroup..
metanorm_S_non_fdr=huus(tv_all,adj,sig.level,sick,sick_group,joo) #This prints to your current folder 'Steroid_Data_Analysis

# Ei mee tääkän ihan nopeesti läpi ChatGPT:llä saati Copilotkaan.
# Tää olis kiva saada paremmaks, joten jätän tämän OS. (Funktiota.R löytyy kansiosta, siellä lisää, yks minitoimiva on.)
Heatmap of LMEs (linear model estimates) from Steroids vs. BAs & Lipids & Covariates with All Subjects

Heatmap of LMEs (linear model estimates) from Steroids vs. BAs & Lipids & Covariates with All Subjects

Heatmap of LMEs from Steroids vs. BAs & Lipids & Covariates with Female Subjects

Heatmap of LMEs from Steroids vs. BAs & Lipids & Covariates with Female Subjects

Heatmap of LMEs from Steroids vs. BAs & Lipids & Covariates with Male Subjects

Heatmap of LMEs from Steroids vs. BAs & Lipids & Covariates with Male Subjects

Heatmap of LMEs from Steroids vs. PFAS & Covariates with All Subjects

Heatmap of LMEs from Steroids vs. PFAS & Covariates with All Subjects

Heatmap of LMEs from Steroids vs. PFAS & Covariates with Female Subjects

Heatmap of LMEs from Steroids vs. PFAS & Covariates with Female Subjects

Heatmap of LMEs from Steroids vs. PFAS & Covariates with Male Subjects

Heatmap of LMEs from Steroids vs. PFAS & Covariates with Male Subjects

Making Scatter Plots with Models

# This should be as good as it gets... eli  maksimilla vedetään... eli pitäis olla ok, sillä skaalattu vastaa korrelaatiota tss. skaalaus vielä miehiin...
# This looks similar as the linear model code, but is not exactly so. With this you can draw a scatter plot for each individual combination of two variables within dataset
the_fun_figs=function(tv_all,Group,hopiu,aa,bb,fn) { #  ok,aa,bb
  if (Group=='male') {NAFLDo=tv_all[tv_all[,'Gender']==max(tv_all[,'Gender']),]} else if (Group=='female') 
  {NAFLDo=tv_all[tv_all[,'Gender']==min(tv_all[,'Gender']),]} else if (Group=='All') {NAFLDo=tv_all}
  SG0=NAFLDo[,c(2:dim(tv_all)[2])]
  #https://stackoverflow.com/questions/10688137/how-to-fix-spaces-in-column-names-of-a-data-frame-remove-spaces-inject-dots
  oknames=colnames(SG0)
  SG0=data.frame(SG0)
  colnames(SG0)
  colnames(SG0[,8:27]) <- gsub("-", ".", colnames(SG0[,8:27]))
  colnames(SG0[,8:27]) <- gsub("/", ".", colnames(SG0[,8:27]))
  hesh=c()
  xnam <- colnames(SG0)[c(4:7)]
  Treatment=colnames(tv_all)[52:58];
  y <- Treatment;#colnames(SG0)[c(70:76)]
  TreatmentN=Treatment
  
  # Group='All'
  j=1;i=1;p.val=c()
  for (i in 1:length(xnam)) {
    for (j in 1:length(y)) {
      # https://gettinggeneticsdone.blogspot.com/2011/01/rstats-function-for-extracting-f-test-p.html

      hösh=hopiu[hopiu[,1]==y[j] & hopiu[,2]==xnam[i] & hopiu[,3]==Group,]

      # #printtaus tässä...
      jeps=SG0#
      r=as.numeric(hösh[4][1,])
      p.val=as.numeric(hösh[5][1,])
      rsadj=as.numeric(hösh[6][1,])
      colnames(jeps)=colnames(tv_all)[2:dim(tv_all)[2]]
      Treatment=as.character(hösh[2][1,])
      Mediator=as.character(hösh[1][1,])

      Mediator <- gsub("\\.", "-", Mediator)
      Mediator <- gsub("X", "", Mediator)
      
      if (Mediator=="T-Epi-T") {Mediator[Mediator=="T-Epi-T"]="T/Epi-T"}
      Treatment2=Treatment
      colnames(jeps)[colnames(jeps) == 'HOMA-IR']='HOMA.IR'
      colnames(jeps) <- gsub(" ", "\\.", colnames(jeps))
      main_dir <- paste0(c("C://Users//patati//Desktop//Turku//R//",fn,Group,"/"),collapse="")# setting up the sub directory
      sub_dir <-Treatment #paste0(c("\\",Treatment),collapse="")#Treatment2[i] # check if sub directory exists: #https://www.geeksforgeeks.org/r-check-if-a-directory-exists-and-create-if-it-does-not/
      if (file.exists(file.path(main_dir, sub_dir))){setwd(file.path(main_dir, sub_dir))} else {dir.create(file.path(main_dir, sub_dir)); setwd(file.path(main_dir, sub_dir))}
      if (Mediator!="T/Epi-T") {jpeg(paste("Correlations plotted",Group,Treatment2,Mediator,".jpg"), width = 1000, height = 1000, quality = 100,pointsize = 20, res=300);} else {
        jpeg(paste("Correlations plotted_alle",Treatment2,'T.Epi.T',".jpg"), width = 1000, height = 1000, quality = 100,pointsize = 20, res=300);}
      
      # equation1=function(x){coef(poissone)[2]*x+coef(poissone)[1]+coef(poissone)[3]}
      
      xcx=jeps[,Treatment];
      ycy=jeps[,Mediator];väh=round(sd(ycy)/2,2)
      a=ggplot(jeps, aes(y=ycy, x=xcx)) +
        geom_point() +
        xlab(Treatment) +
        ylab(Mediator) +
        # stat_function(fun=equation1,geom="line",color='black')+
        geom_smooth(method="lm",col="black") + #https://ggplot2.tidyverse.org/reference/geom_smooth.html
        annotate("text", x=min(xcx),y=max(ycy),hjust=0,vjust=0, label=paste0("r = ", round(r,5))) +
        annotate("text", x=min(xcx),y=max(ycy)-väh,hjust=0,vjust=0, label=paste0("p = ", round(p.val, 5))) +
        theme_classic()

      print(a)
      dev.off()
    }}
  
  # getwd()
  
  j=1;i=1; rm(xnam,y)
  xnam <- colnames(SG0)[c(2)]
  y <- TreatmentN#colnames(SG0)[c(70:76)]
  for (i in 1:length(xnam)) {
    for (j in 1:length(y)) {
    # https://gettinggeneticsdone.blogspot.com/2011/01/rstats-function-for-extracting-f-test-p.html
      hösh=hopiu[hopiu[,1]==y[j] & hopiu[,2]==xnam[i] & hopiu[,3]==Group,]

      # #printtaus tässä...
      jeps=SG0#
      r=as.numeric(hösh[4][1,])
      p.val=as.numeric(hösh[5][1,])
      rsadj=as.numeric(hösh[6][1,])
      colnames(jeps)=colnames(tv_all)[2:dim(tv_all)[2]]
      Treatment=as.character(hösh[2][1,])
      Mediator=as.character(hösh[1][1,])
      rm(hösh)
      
      Mediator <- gsub("\\.", "-", Mediator)
      Mediator <- gsub("X", "", Mediator)
      
      if (Mediator=="T-Epi-T") {Mediator[Mediator=="T-Epi-T"]="T/Epi-T"}
      colnames(jeps)[colnames(jeps) == 'HOMA-IR']='HOMA.IR'
      colnames(jeps) <- gsub(" ", "\\.", colnames(jeps))
      main_dir <- paste0(c("C://Users//patati//Desktop//Turku//R//",fn,Group,"/"),collapse="")
      # main_dir <- "C://Users//patati//Desktop//Turku//R//males2/"# setting up the sub directory
      sub_dir <-Treatment#paste0(c("\\",Treatment),collapse="")#Treatment2[i] # check if sub directory exists: #https://www.geeksforgeeks.org/r-check-if-a-directory-exists-and-create-if-it-does-not/
      if (file.exists(file.path(main_dir, sub_dir))){setwd(file.path(main_dir, sub_dir))} else {dir.create(file.path(main_dir, sub_dir)); setwd(file.path(main_dir, sub_dir))}
      if (Mediator!="T/Epi-T") {jpeg(paste("Correlations plotted",Group,Treatment2,Mediator,".jpg"), width = 1000, height = 1000, quality = 100,pointsize = 20, res=300);} else{
        jpeg(paste("Correlations plotted",Treatment2,'T.Epi.T',".jpg"), width = 1000, height = 1000, quality = 100,pointsize = 20, res=300);}

      xcx=jeps[,Treatment];
      ycy=jeps[,Mediator];väh=round(sd(ycy)/2,2)
      a=ggplot(jeps, aes(y=ycy, x=xcx)) +
        geom_point() +
        xlab(Treatment) +
        ylab(Mediator) +
        geom_smooth(method="lm",col="black") + #https://ggplot2.tidyverse.org/reference/geom_smooth.html
        annotate("text", x=min(xcx),y=max(ycy),hjust=0,vjust=0, label=paste0("r = ", round(r,4))) +
        annotate("text", x=min(xcx),y=max(ycy)-väh,hjust=0,vjust=0, label=paste0("p = ", round(p.val, 5))) +
        theme_classic()
      
      print(a)
      dev.off()
      hesh=rbind(hesh,c(y[j],xnam[i],Group,r,p.val,rsadj))
    }}
  
  j=1;i=1; #rm(xnam,y)
  xnam <- colnames(SG0)[c(3)];y <- TreatmentN#colnames(SG0)[c(70:76)]
  for (i in 1:length(xnam)) {
    for (j in 1:length(y)) {
  
      # #https://stats.stackexchange.com/questions/190763/how-to-decide-which-glm-family-to-use
    
      hösh=hopiu[hopiu[,1]==y[j] & hopiu[,2]==xnam[i] & hopiu[,3]==Group,]
      
      # #printtaus tässä...
      jeps=SG0#
      # rm(r,p.val,rsadj)
      r=as.numeric(hösh[4][1,])
      p.val=as.numeric(hösh[5][1,])
      rsadj=as.numeric(hösh[6][1,])
      colnames(jeps)=colnames(tv_all)[2:dim(tv_all)[2]]
      Treatment=as.character(hösh[2][1,])
      Mediator=as.character(hösh[1][1,])
      rm(hösh)
      
      Mediator <- gsub("\\.", "-", Mediator)
      Mediator <- gsub("X", "", Mediator)
      
      if (Mediator=="T-Epi-T") {Mediator[Mediator=="T-Epi-T"]="T/Epi-T"}
      Treatment2=Treatment
      colnames(jeps)[colnames(jeps) == 'HOMA-IR']='HOMA.IR'
      colnames(jeps) <- gsub(" ", "\\.", colnames(jeps))
      main_dir <- paste0(c("C://Users//patati//Desktop//Turku//R//",fn,Group,"/"),collapse="")
      # main_dir <- "C://Users//patati//Desktop//Turku//R//males2/"# setting up the sub directory
      sub_dir <-Treatment#paste0(c("\\",Treatment),collapse="")#Treatment2[i] # check if sub directory exists: #https://www.geeksforgeeks.org/r-check-if-a-directory-exists-and-create-if-it-does-not/
      if (file.exists(file.path(main_dir, sub_dir))){setwd(file.path(main_dir, sub_dir))} else {dir.create(file.path(main_dir, sub_dir)); setwd(file.path(main_dir, sub_dir))}
      if (Mediator!="T/Epi-T") {jpeg(paste("Correlations plotted_alle",Treatment2,Mediator,".jpg"), width = 1000, height = 1000, quality = 100,pointsize = 20, res=300);} else{
        jpeg(paste("Correlations plotted",Treatment2,'T.Epi.T',".jpg"), width = 1000, height = 1000, quality = 100,pointsize = 20, res=300);}

      xcx=jeps[,Treatment];
      ycy=jeps[,Mediator];väh=round(sd(ycy)/2,2)
      a=ggplot(jeps, aes(y=ycy, x=xcx)) +
        geom_point() +
        xlab(Treatment) +
        ylab(Mediator) +
        # stat_function(fun=equation1,geom="line",color='black')+
        geom_smooth(method="lm",col="black") + #https://ggplot2.tidyverse.org/reference/geom_smooth.html
        annotate("text", x=min(xcx),y=max(ycy),hjust=0,vjust=0, label=paste0("r = ", round(r,5))) +
        annotate("text", x=min(xcx),y=max(ycy)-väh,hjust=0,vjust=0, label=paste0("p = ", round(p.val, 5))) +
        theme_classic()
      print(a)
      dev.off()
    }}
  

  j=1;i=1; #rm(xnam,y)
  xnam <- TreatmentN#colnames(SG0)[c(70:76)]#paste("x", 1:25, sep="") 28:length(colnames(SG0)
  y = colnames(SG0[,8:27])
  for (i in 1:length(xnam)) {
    for (j in 1:length(y)) {
    # https://gettinggeneticsdone.blogspot.com/2011/01/rstats-function-for-extracting-f-test-p.html
      hösh=hopiu[hopiu[,1]==y[j] & hopiu[,2]==xnam[i] & hopiu[,3]==Group,]
      
      # #printtaus tässä...
      jeps=SG0#
      rm(r,p.val,rsadj)
      r=as.numeric(hösh[4][1,])
      p.val=as.numeric(hösh[5][1,])
      rsadj=as.numeric(hösh[6][1,])
      colnames(jeps)=colnames(tv_all)[2:dim(tv_all)[2]]
      Treatment=as.character(hösh[2][1,])
      Mediator=as.character(hösh[1][1,])
      rm(hösh)
      
      Mediator <- gsub("\\.", "-", Mediator)
      Mediator <- gsub("X", "", Mediator)
      
      if (Mediator=="T-Epi-T") {Mediator[Mediator=="T-Epi-T"]="T/Epi-T"}
      Treatment2=Treatment
      colnames(jeps)[colnames(jeps) == 'HOMA-IR']='HOMA.IR'
      colnames(jeps) <- gsub(" ", "\\.", colnames(jeps))
      main_dir <- paste0(c("C://Users//patati//Desktop//Turku//R//",fn,Group,"/"),collapse="")
      sub_dir <-Treatment#paste0(c("\\",Treatment),collapse="")#Treatment2[i] # check if sub directory exists: #https://www.geeksforgeeks.org/r-check-if-a-directory-exists-and-create-if-it-does-not/
      if (file.exists(file.path(main_dir, sub_dir))){setwd(file.path(main_dir, sub_dir))} else {dir.create(file.path(main_dir, sub_dir)); setwd(file.path(main_dir, sub_dir))}
      if (Mediator!="T/Epi-T") {jpeg(paste("Correlations plotted",Group,Treatment2,Mediator,".jpg"), width = 1000, height = 1000, quality = 100,pointsize = 20, res=300);} else{
        jpeg(paste("Correlations plotted",Treatment2,'T.Epi.T',".jpg"), width = 1000, height = 1000, quality = 100,pointsize = 20, res=300);}
      
      xcx=jeps[,Treatment];
      ycy=jeps[,Mediator];väh=round(sd(ycy)/2,2)
      a=ggplot(jeps, aes(y=ycy, x=xcx)) +
        geom_point() +
        xlab(Treatment) +
        ylab(Mediator) +
        # stat_function(fun=equation1,geom="line",color='black')+
        geom_smooth(method="lm",col="black") + #https://ggplot2.tidyverse.org/reference/geom_smooth.html
        annotate("text", x=min(xcx),y=max(ycy),hjust=0,vjust=0, label=paste0("r = ", round(r,4))) +
        annotate("text", x=min(xcx),y=max(ycy)-väh,hjust=0,vjust=0, label=paste0("p = ", round(p.val, 5))) +
        theme_classic()
      print(a)
      dev.off()
      
    }}

  # Eli 'kaksi' vielä tarvitaan...
  # 1) BA/lipid=covar ja steroidit, ja 2) steroid=covar
  #tai oikeastaan ehkä... steroid=covar ja steroid = BA/lipid
  
  # # 1) (BA/lipid=covar ja steroids)
   #https://stats.stackexchange.com/questions/190763/how-to-decide-which-glm-family-to-use

  # # 1)  steroids=BA/lipid
  j=1;i=1; #rm(xnam,y)
  xnam <- c(x3,x6) # Group='All'
  y <- c(colnames(SG0[,8:27])); #colnames(SG0)[c(4:7)]
  
  for (i in 1:length(xnam)) {
    for (j in 1:length(y)) {
  
      hösh=hopiu[hopiu[,1]==y[j] & hopiu[,2]==xnam[i] & hopiu[,3]==Group,]

      # #printtaus tässä...
      jeps=SG0#
      rm(r,p.val,rsadj)
      r=as.numeric(hösh[4][1,])
      p.val=as.numeric(hösh[5][1,])
      rsadj=as.numeric(hösh[6][1,])
      colnames(jeps)=colnames(tv_all)[2:dim(tv_all)[2]]
      Treatment=as.character(hösh[2][1,])
      Mediator=as.character(hösh[1][1,])
      rm(hösh)
      
      Mediator <- gsub("\\.", "-", Mediator)
      Mediator <- gsub("X", "", Mediator)
      
      if (Mediator=="T-Epi-T") {Mediator[Mediator=="T-Epi-T"]="T/Epi-T"}
      
      Treatment2=Treatment
      colnames(jeps)[colnames(jeps) == 'HOMA-IR']='HOMA.IR'
      colnames(jeps) <- gsub(" ", "\\.", colnames(jeps))
      main_dir <- paste0(c("C://Users//patati//Desktop//Turku//R//",fn,Group,"/"),collapse="")
      # main_dir <- "C://Users//patati//Desktop//Turku//R//males2/"# setting up the sub directory
      sub_dir <-Treatment#paste0(c("\\",Treatment),collapse="")#Treatment2[i] # check if sub directory exists: #https://www.geeksforgeeks.org/r-check-if-a-directory-exists-and-create-if-it-does-not/
      if (file.exists(file.path(main_dir, sub_dir))){setwd(file.path(main_dir, sub_dir))} else {dir.create(file.path(main_dir, sub_dir)); setwd(file.path(main_dir, sub_dir))}
      if (Mediator!="T/Epi-T") {jpeg(paste("Correlations plotted",Group,Treatment2,Mediator,".jpg"), width = 1000, height = 1000, quality = 100,pointsize = 20, res=300);} else{
        jpeg(paste("Correlations plotted",Treatment2,'T.Epi.T',".jpg"), width = 1000, height = 1000, quality = 100,pointsize = 20, res=300);}
      xcx=jeps[,Treatment];
      ycy=jeps[,Mediator];väh=round(sd(ycy)/2,2)
      a=ggplot(jeps, aes(y=ycy, x=xcx)) +
        geom_point() +
        xlab(Treatment) +
        ylab(Mediator) +
        geom_smooth(method="lm",col="black") + #https://ggplot2.tidyverse.org/reference/geom_smooth.html
        annotate("text", x=min(xcx),y=max(ycy),hjust=0,vjust=0, label=paste0("r = ", round(r,5))) +
        annotate("text", x=min(xcx),y=max(ycy)-väh,hjust=0,vjust=0, label=paste0("p = ", round(p.val, 5))) +
        theme_classic()
      print(a)
      dev.off()
      hesh=rbind(hesh,c(y[j],xnam[i],Group,r,p.val,rsadj))}}
  
  # ja 2) steroid = covar
  j=1;i=1; #rm(xnam,y)
  xnam <- c('AGE','BMI',colnames(SG0)[c(4:7)]); 
  y <- c(colnames(SG0[,8:27])) # Group='All'
  
  for (i in 1:length(xnam)) {
    for (j in 1:length(y)) {
      hösh=hopiu[hopiu[,1]==y[j] & hopiu[,2]==xnam[i] & hopiu[,3]==Group,]
      
      # #printtaus tässä...
      jeps=SG0#
      rm(r,p.val,rsadj)
      r=as.numeric(hösh[4][1,])
      p.val=as.numeric(hösh[5][1,])
      rsadj=as.numeric(hösh[6][1,])
      colnames(jeps)=colnames(tv_all)[2:dim(tv_all)[2]]
      Treatment=as.character(hösh[2][1,])
      Mediator=as.character(hösh[1][1,])
      rm(hösh)
      
      Mediator <- gsub("\\.", "-", Mediator)
      Mediator <- gsub("X", "", Mediator)
      
      if (Mediator=="T-Epi-T") {Mediator[Mediator=="T-Epi-T"]="T/Epi-T"}

      Treatment2=Treatment
      colnames(jeps)[colnames(jeps) == 'HOMA-IR']='HOMA.IR'
      colnames(jeps) <- gsub(" ", "\\.", colnames(jeps))
      main_dir <- paste0(c("C://Users//patati//Desktop//Turku//R//",fn,Group,"/"),collapse="")
      # main_dir <- "C://Users//patati//Desktop//Turku//R//males2/"# setting up the sub directory
      sub_dir <-Treatment#paste0(c("\\",Treatment),collapse="")#Treatment2[i] # check if sub directory exists: #https://www.geeksforgeeks.org/r-check-if-a-directory-exists-and-create-if-it-does-not/
      if (file.exists(file.path(main_dir, sub_dir))){setwd(file.path(main_dir, sub_dir))} else {dir.create(file.path(main_dir, sub_dir)); setwd(file.path(main_dir, sub_dir))}
      if (Mediator!="T/Epi-T") {jpeg(paste("Correlations plotted",Group,Treatment2,Mediator,".jpg"), width = 1000, height = 1000, quality = 100,pointsize = 20, res=300);} else{
        jpeg(paste("Correlations plotted_",Treatment2,'T.Epi.T',".jpg"), width = 1000, height = 1000, quality = 100,pointsize = 20, res=300);}
      xcx=jeps[,Treatment];
      ycy=jeps[,Mediator];väh=round(sd(ycy)/2,2)
      a=ggplot(jeps, aes(y=ycy, x=xcx)) +
        geom_point() +
        xlab(Treatment) +
        ylab(Mediator) +
        geom_smooth(method="lm",col="black") + #https://ggplot2.tidyverse.org/reference/geom_smooth.html
        annotate("text", x=min(xcx),y=max(ycy),hjust=0,vjust=0, label=paste0("r = ", round(r,5))) +
        annotate("text", x=min(xcx),y=max(ycy)-väh,hjust=0,vjust=0, label=paste0("p = ", round(p.val, 5))) +
        theme_classic()
      print(a)
      dev.off()
      hesh=rbind(hesh,c(y[j],xnam[i],Group,r,p.val,rsadj))
    }}
  
  hesa=hesh
  hoi=as.data.frame(hesh)
  main_dir <- paste0(c("C://Users//patati//Desktop//Turku//R//",fn),collapse="")
  setwd(main_dir)
  
  return(list(hopiu))
}

huus2=function(tv_all,hopiu) {
  huus=c();heijaa=c('All','female','male'); 
  for (hrt in 1:3) {
    huus=append(huus,the_fun_figs(tv_all,heijaa[hrt],hopiu,aa,bb,fn))}
  return(huus)}


# Now I'll be just showing some examples, but this works and if you drive this, you can find all the combos in the folders.
# E.g.: ".../R/metabnorm/covScaled/non_fdr/'Subject'/Hexcer/Correlations plotted All Hexcer S .jpg" 
# fyi: do the 'subject' folders (all, male, fem.) separately
# aa=-0.5; bb=0.5; adj='nook'; sig.level=c(.001,0.01, 0.05)
# fn='metabnorm//covScaled//non_fdr//'#
# metanorm_S_non_fdr=huus(tv_covscl,adj,sig.level) # This was already done above
# metanorm_S_fdr_non_tot=ldply(metanorm_S_non_fdr, data.frame) #Making the above ok for the next:
# non_fdr_check=huus2(tv_covscl,metanorm_S_fdr_non_tot) # This plots all the combinations, so it takes some time

#The above code in this segment does not get easily better with Copilot-

data <- tv_covscl
plot3d(y=data[,'PFOA'],x=data[,'T/Epi-T'],z=data[,'PE'], #data[,'PFHxS'],x=data[,'T/Epi-T'],z=data[,'PE']
  col = 'black',
  type = 's',
  radius = .2,
  ylab="PFHxS", xlab="T/Epi-T", zlab="PE")


my.lm <- lm(data[,'PFHxS'] ~ data[,'T/Epi-T'] + data[,'PE']);#s3d$plane3d(my.lm)
x <- data[,'T/Epi-T']
y <- data[,'PFHxS']
z <- data[,'PE']
s3d <- scatterplot3d(x, y, z, pch = 20, mar = c(5, 3, 4, 3),
                     main = "3D Scatter Plot of Compounds and Their Residuals",angle = 55,scale.y = 0.5, xlab = "T/Epi-T",
                     ylab = "PFHxS ",
                     zlab = "PE",)
s3d$plane3d(my.lm, lty = "dotted")
orig <- s3d$xyz.convert(x, y, z)
plane <- s3d$xyz.convert(x, y, fitted(my.lm))
i.negpos <- 1 + (resid(my.lm) > 0)
segments(orig$x, orig$y, plane$x, plane$y,
         col = c("blue", "red")[i.negpos], lty = (2:1)[i.negpos])

data <- tv_covscl
plot3d(
  y=data[,'PFOA'],x=data[,'AN'],z=data[,'PC_O'], #data[,'PFHxS'],x=data[,'T/Epi-T'],z=data[,'PE']
  col = 'black',
  type = 's',
  radius = .2,
  ylab="PFOA", xlab="AN", zlab="PC_O")

my.lm <- lm(data[,'PFOA'] ~ data[,'AN'] + data[,'PC_O']);#s3d$plane3d(my.lm)
y <- data[,'PFOA']
x <- data[,'AN']
z <- data[,'PC_O']
s3d <- scatterplot3d(x, y, z, pch = 20, mar = c(5, 3, 4, 3),
                     main = "3D Scatter Plot of Compounds and Their Residuals",angle = 55,scale.y = 0.5, 
                     xlab = "AN",
                     ylab = "PFOA ",
                     zlab = "PC_O",)
s3d$plane3d(my.lm, lty = "dotted")
orig <- s3d$xyz.convert(x, y, z)
plane <- s3d$xyz.convert(x, y, fitted(my.lm))
i.negpos <- 1 + (resid(my.lm) > 0)
segments(orig$x, orig$y, plane$x, plane$y,
         col = c("blue", "red")[i.negpos], lty = (2:1)[i.negpos])

# # Plot uusix

data <- tv_covscl
plot3d(y=data[,'PFHxA'],x=data[,'DHEA'],z=data[,'PC_O'], #data[,'PFHxS'],x=data[,'T/Epi-T'],z=data[,'PE']
  col = 'black',
  type = 's',
  radius = .2,
  ylab="PFHxA", xlab="DHEA", zlab="PC")

my.lm <- lm(data[,'PFHxA'] ~ data[,'DHEA'] + data[,'PC']);#s3d$plane3d(my.lm)
y <- data[,'PFHxA']
x <- data[,'DHEA']
z <- data[,'PC']
s3d <- scatterplot3d(x, y, z, pch = 20, mar = c(5, 3, 4, 3),
                     main = "3D Scatter Plot of Compounds and Their Residuals",angle = 55,scale.y = 0.5, 
                     xlab = "DHEA",
                     ylab = "PFHxA ",
                     zlab = "PC",)
s3d$plane3d(my.lm, lty = "dotted")
orig <- s3d$xyz.convert(x, y, z)
plane <- s3d$xyz.convert(x, y, fitted(my.lm))
i.negpos <- 1 + (resid(my.lm) > 0)
segments(orig$x, orig$y, plane$x, plane$y,
         col = c("blue", "red")[i.negpos], lty = (2:1)[i.negpos])
Correlations Scatter Plotted_All_Hexcer S

Correlations Scatter Plotted_All_Hexcer S

Correlations Scatter Plotted_Female_Hexcer S

Correlations Scatter Plotted_Female_Hexcer S

Correlations Scatter Plotted_Male_Hexcer S

Correlations Scatter Plotted_Male_Hexcer S

Correlations Scatter Plotted_All_C4_L P5

Correlations Scatter Plotted_All_C4_L P5

Correlations Scatter Plotted_Female_C4_L P5

Correlations Scatter Plotted_Female_C4_L P5

Correlations Scatter Plotted_Male_C4_L P5

Correlations Scatter Plotted_Male_C4_L P5

Making Network Plot

# Following the previous work for doing the network plot (https://r-graph-gallery.com/network.html)
# Here the main data of the plot takes correlations:
tv_c=tv_all#[,9:dim(tv_covscl)[2]] #tv_half_log22 #cbind(tv[,1:8], tv_half_log2) #check also not logged and then the auto one
tv_c=tv_c[,!colnames(tv_c) %in% c('Total_TG','PFAS','Perfluorodecyl.ethanoic.acid')]; tv_c=tv_c[,!colnames(tv_c) %in% x4]
colnames(tv_c)[colnames(tv_c)=="17aOH-P4"]="17a-OHP4"
dat = tv_c; 
dat=dat[,!colnames(dat) %in% c('Gender','PatientNumber')] #SEX.1F.2M
resulta <- (rcorr(as.matrix(dat), type = c('spearman')))$r
n_level=0.9 #this is high to get connections between all the verteces (pampulat)
Nrr=qpNrr(resulta, verbose=FALSE);Nrr[is.na(Nrr)]=1;#print(hist(as.numeric(Nrr),breaks=50)); 
cond=data.frame(as.matrix(Nrr<n_level))
RN=data.frame(resulta);tes_t=cond*RN;tes_t=as.matrix(tes_t);resulta=tes_t;colnames(resulta)=rownames(resulta) #https://www.geeksforgeeks.org/elementwise-matrix-multiplication-in-r/
tes_t=resulta
a=length(x1)-2;b=length(x2);c=length(x3);d=length(x4);e=length(x5);f=length(x6);

# Removing self-correlation
tes_t[1:a,1:a]=0
tes_t[(a+1):(a+b),(a+1):(a+b)]=0
tes_t[(a+b+1):(a+b+c),(a+b+1):(a+b+c)]=0
tes_t[(a+b+c+1):(a+b+c+e),(a+b+c+1):(a+b+c+e)]=0
tes_t[(a+b+c+e+1):(a+b+c+e+f),(a+b+c+e+1):(a+b+c+e+f)]=0

tes_t=tes_t[colnames(tes_t)[7:66],colnames(tes_t)[7:66]] # tes_t=resulta #ks. correlations
g <- graph_from_adjacency_matrix(tes_t, mode="upper", weighted=TRUE, diag=FALSE)
e <- as_edgelist(g); df <- as.data.frame(cbind(e,E(g)$weight)); #
df[,3]=as.numeric(df[, 3])
hoi=df # hoi[rev(order(hoi[,3])),]
hoi=hoi[!duplicated(hoi[,c(1,2)]),]

# https://r-graph-gallery.com/249-igraph-network-map-a-color.html
library(igraph)
# # create data:
links <- data.frame(
  source=c("A","A", "A", "A", "A","J", "B", "B", "C", "C", "D","I"),
  target=c("B","B", "C", "D", "J","A","E", "F", "G", "H", "I","I"),
  importance=(sample(1:4, 12, replace=T)))
colnames(hoi)=colnames(links)
links=hoi
sources=hoi %>% distinct(source) %>% rename(source='label')
destinations=hoi %>% distinct(target) %>% rename(target ='label')
nodess <- full_join(sources, destinations, by = "label")

xc=x5[x5 %in% nodess$label]
xb=x3[x3 %in% nodess$label]
xl=x6[x6 %in% nodess$label]
x2[x2 =='17aOH-P4']='17a-OHP4' #Next time check these wrong names early on... :)
xs=x2[x2 %in% nodess$label]
nodess$label=c(xc,xb,xl,xs)

nodes <- data.frame(name=nodess[,1], #  carac=c( rep("Contaminants",length(7)),rep("Bile Acids",length(u2)),rep("Lipids",length(u3)),rep("Steroids",ls))) #range on kaikki +1
  carac=( c(rep("Contaminants",length(xc)),rep("Bile Acids",length(xb)),rep("Lipids",length(xl)),rep("Steroids",length(xs))))) #range on kaikki +1

# Turn it into igraph object
network <- graph_from_data_frame(d=links, vertices=nodes, directed=F) 
# Make a palette of 3 colors
library(RColorBrewer);library(ragg)

windowsFonts(A = windowsFont("Calibri (Body)"))
coul  <- c('#B2BEB5','Green','Red','Orange') # brewer.pal(2, "Set1") ,'#ADD8E6','#faf0e6'
# Create a vector of color
my_color <- coul[as.numeric(as.factor(V(network)$carac))]

# These above execution lines have been already done,
jpeg('network.jpg',width=4, height=4.7, units="in",quality = 100, pointsize = 7, res=1000)
plot(network, mode = "circle",vertex.color=my_color, vertex.size = 10,
     edge.arrow.size = 0.8,vertex.label.cex = 0.35,edge.width=as.numeric(E(network)$importance)*6.00 )
legend("topright", legend=levels(as.factor(V(network)$carac))  ,
       col = coul , bty = "n", pch=20 , pt.cex = 1.3, cex = 1.3, text.col=coul ,
       horiz = FALSE, inset = c(0.65, 0.8))
dev.off(); 
my_image <- image_read(eoh);my_svg <- image_convert(my_image, format="svg"); image_write(my_svg, paste(eoh,".svg"))

# And now just showing the figure separately:
knitr::include_graphics('network.jpg'); eoh='network.jpg'; daiR::image_to_pdf(eoh, pdf_name=paste0(eoh,'.pdf'));
#... alternative with copilot not ok:
# Load necessary libraries

# # Convert matrix to data frame
# tv_c <- as.data.frame(tv_all)
# 
# # Check column names
# print(colnames(tv_c))
# 
# # Preprocess the data
# # Remove columns only if they exist
# columns_to_remove <- c('Total_TG', 'PFAS', 'Perfluorodecyl.ethanoic.acid')
# existing_columns <- columns_to_remove[columns_to_remove %in% colnames(tv_c)]
# tv_c <- tv_c %>%
#   select(-one_of(existing_columns)) %>%
#   select(-one_of(x4))
# 
# colnames(tv_c)[colnames(tv_c) == "17aOH-P4"] <- "17a-OHP4"
# dat <- tv_c %>%
#   select(-c(Gender, PatientNumber))
# 
# # Calculate correlation matrix
# resulta <- (rcorr(as.matrix(dat), type = 'spearman'))$r
# 
# # Thresholding and element-wise multiplication
# n_level <- 0.9
# Nrr <- qpNrr(resulta, verbose = FALSE)
# Nrr[is.na(Nrr)] <- 1
# cond <- as.matrix(Nrr < n_level)
# tes_t <- cond * resulta
# colnames(tes_t) <- rownames(tes_t)
# 
# # Removing self-correlation
# diag(tes_t) <- 0


# Copilot is not helping much here.

Making Causal Mediation Analysis

# The basic hypothesis. All are variables (y~x+m;m~x)
loop_med_simplified1a=function(Treatment, Mediator, Outcome,tv_all,Group,name,simss,t.val,test,sick,sick_group) {
  if (Group=='female') {cond=tv_all[,'Gender']==min(tv_all[,'Gender'])} else if (Group=='male') {cond=tv_all[,'Gender']==max(tv_all[,'Gender'])} else if (Group=='All') {cond=rep(TRUE,dim(tv_all)[1])}
  tv_red=c(); 
  if (sick=='yes') {tv_red=tv_all[cond & as.vector(sick_group),]} else   {tv_red=tv_all[cond,]} 
  X <- tv_red[,Treatment] #Standard values did not five erros # hep=colnames(X)[!colnames(X) %in% c( "Benzylparaben" ,"Methylparaben")] # X=X[,hep]
  M <- tv_red[,Mediator]  #
  Y <- tv_red[,Outcome]   #"Steatosis.Grade.0.To.3"       "Fibrosis.Stage.0.to.4"       "Necroinflammation"            "HOMA-IR"   
  Data <- cbind(X,M,Y);   ## colnames(Data)[which(names(Data) == "X")] <- "PFOA_L"
  colnames(Data) <- gsub(" ", "_", colnames(Data)) # colnames(Data[,1:2])[1]=Treatment
  Data=data.frame(Data)
  # https://rdrr.io/cran/mlma/man/data.org.html # https://cran.r-project.org/web/packages/mma/mma.pdf
  # this time the b and c are in the loop, and b is the model 'M', i.e. M~X (e.g. Allocholic acid ~ PFOA_L)
  # the c is the model 'Y', i.e. Y~M+X, e.g. CAR ~PFOA + Allocholic acid (note, just one mediator at the time... )
  control.value=colMins(as.matrix(X)) #test also with colMedians colMins -abs(colMins(as.matrix(X))*2
  treat.value=colMaxs(as.matrix(X))
  #M~X
  x=c();m=c(); y=c();ye=c()
  for (i in 1:length(colnames(X))) {x=append(x,paste("Data[, ",i , "]", sep=""))}
  for (j in (dim(X)[2]+1):(length(colnames(M))+dim(X)[2])) {m=append(m,paste("Data[, ",j , "]", sep="")) }
  #Y~X+M
  for (z in (dim(M)[2]+dim(X)[2]+1):(dim(Data)[2])) {y=append(y,paste("Data[, ",z , "]", sep="")) } #this dimension was essential for the loop names
  med_out=c();res=c(); tmp=c();rn=c();med_oute=c();med_sense=c();resa=c()  
  j=1;i=1;z=1
  for (i in 1:length(y)) {
    for (j in 1:length(m)) { #control.value=mina[i]
      for (z in 1:length(x)) {
        fmla1 <- as.formula(paste(paste(m[j], collapse= "+")," ~ ", paste(x[z], collapse= "+"))) 
        b = lm(fmla1, Data)
        xm=paste(paste(c(x[z],m[j]), collapse= "+"))
        fmla2 <- as.formula(paste(y[i]," ~ ", xm)) #https://www.statology.org/glm-vs-lm-in-r/
        c = lm(fmla2, Data) 
        if (t.val=='no'){med_oute=mediate(b, c, treat =  x[z], mediator = m[j],sims = simss)} else if (t.val=='yes') # control.value=control.value[z],treat.value=treat.value[z]  
        {med_oute=mediate(b, c, treat =  x[z], mediator = m[j],sims = simss,control.value=control.value[z],treat.value=X[test,z] )} else if (t.val=='minmax') 
        {med_oute=mediate(b, c, treat =  x[z], mediator = m[j],sims = simss,control.value=control.value[z],treat.value=treat.value[z] )} 
        
        med_out = summary(med_oute) #you need sims=100 min for the paper, maybe more like 1000... 10 was too little, but can get you results fast..
        tmp=c(med_out$d0, med_out$d0.p, med_out$d0.ci[1],med_out$d0.ci[2],med_out$z0, med_out$z0.p, med_out$z0.ci[1],med_out$z0.ci[2],med_out$n1, med_out$n1.p,med_out$n1.ci[1],med_out$n1.ci[2],med_out$tau.coef,med_out$tau.p,med_out$tau.ci[1],med_out$tau.ci[2]) 
        res <- rbind(res,tmp);
        rn=append(rn,paste(colnames(X)[z],colnames(M)[j],colnames(Y)[i], sep=" ")) #attaching two rownames...
        remove(tmp) }}} #https://intro2r.com/loops.html https://www.benjaminbell.co.uk/2022/12/loops-in-r-nested-loops.html 
  
  rownames(res)=rn #write.csv(rn,'iii.csv')
  colnames(res)=c('ACME', 'd0.p', 'd0.ci_l','d0.ci_u','ADE', 'z0.p', 'z0.ci_l','z0.ci_u','Proportion Mediated', 'n1.p','n.ci_l','n1.ci_u','Total Effect','tau.p','tau.ci_l','tau.ci_u') #d0 and d1 are the same as.. 'd1', 'd1.p',
  res=res[order(res[,2]),] #res=res[rev(order(res[,1])),]
  rownames(res) <- gsub("X11", "11", rownames(res))
  rownames(res) <- gsub("X17", "17", rownames(res))
  write.xlsx(res, file = paste(name,Group,date,'.xlsx'), append = FALSE, row.names = TRUE) 
  #https://stackoverflow.com/questions/21847830/handling-java-lang-outofmemoryerror-when-writing-to-excel-from-r
  return(res)}

# Testing hypothesis one:
the_essentials=function(Treatment, Mediator, Outcome,tv, Group,name,simss,t.val,test,sick,sick_group,fn,lkm,date,joo,ip) {
  hoi1=paste("C:/Users/patati/Desktop/Turk/R/basic causal mediation with the counterfactuals/",fn,sep='')
  setwd(hoi1) #https://topepo.github.io/caret/parallel-processing.html # https://ds-pl-r-book.netlify.app/optimization-in-r.html

  name=paste(simss,'basic_divisions'); #sick='yes'
  Group='All'; uh7ma=loop_med_simplified1a(Treatment, Mediator, Outcome,tv_all,Group,name,simss,t.val,test,sick,sick_group);try({uh7ma},{uh7ma=data.frame(0)});  save(uh7ma,file=paste(fn,'all.RData'));#try({uh7ma},{uh7ma=data.frame(0)})
  Group='female'; uh7f=loop_med_simplified1a(Treatment, Mediator, Outcome,tv_all,Group,name,simss,t.val,test,sick,sick_group);  try({uh7f},{uh7f=data.frame(0)});save(uh7f, file=paste(fn,'female.RData'));
  Group='male';   uh7m=loop_med_simplified1a(Treatment, Mediator, Outcome,tv_all,Group,name,simss,t.val,test,sick,sick_group);try({uh7m},{uh7m=data.frame(0)});save(uh7m, file=paste(fn,'male.RData'));
}

simss=100;name='Jaot_OK_basica'; joo='ei';ip=1
ccova=tv[,c("Steatosis.Grade.0.To.3" , "Fibrosis.Stage.0.to.4" ,"Necroinflammation" ,  "HOMA-IR")]
sick_group=rowSums(ccova)>4
file_names=c("Steatosis" , "Fibrosis" ,"Necroinflammation" ,  "HOMAIR", 'Menopause')
lkm=30; joo='ei';ip=1; sick='yes'
t.val='no'; Group='All' #name='generic';

# All;
joo='joo';sick='no'
ccova=tv[,c("Steatosis.Grade.0.To.3", "Fibrosis.Stage.0.to.4" ,"Necroinflammation","HOMA-IR")] 
fn='All'; sick_group=rowSums(ccova)>4 #toth#

# Driving the analysis takes more than 1h (with a basic laptop 2023,100sims and all three groups (all, female and male)) so drive the below only if needed:
# the_essentials(Treatment, Mediator, Outcome,tv_all,Group,name,simss,t.val,test,sick, sick_group,fn,lkm,date,joo,ip)
# setwd("C:/Users/patati/Desktop/Turku/R/basic causal mediation with the counterfactuals/") #check this if needed...

# In case, the 'case' sample/subject analysis would be needed: 
# Steatosis
# ccovae=tv[,c("Steatosis.Grade.0.To.3")]; sick_group=ccovae>0 #toth# # hist(ccovae,breaks=100) # hist(ccova[,'HOMA-IR'],breaks=100)
# fn=file_names[1]; 
# the_essentials(Treatment, Mediator, Outcome,tv_all,Group,name,simss,t.val,test,sick,sick_group,fn,lkm,date,joo,ip)
# #Fibrosis
# ccovae=tv[,c("Fibrosis.Stage.0.to.4")]; 
# sick_group=ccovae>0 #toth#
# fn=file_names[2]; 
# the_essentials(Treatment, Mediator, Outcome,tv_all,Group,name,simss,t.val,test,sick,sick_group,fn,lkm,date,joo,ip)
# #Necroinfl.
# ccovae=tv[,c("Necroinflammation")]; sick_group=ccovae>0 #toth#
# fn=file_names[3];
# the_essentials(Treatment, Mediator, Outcome,tv_all,Group,name,simss,t.val,test,sick,sick_group,fn,lkm,date,joo,ip)
# Homa # remember to always test the function with minimun number setting or as light parameters as possible to get it through... before big runs
# joo='ei';sick='yes'
# ccovae=tv[,c("HOMA-IR")]; sick_group=ccovae>1.5 #toth#
# fn=file_names[4]; 
# the_essentials(Treatment, Mediator, Outcome,tv_all,Group,name,simss,t.val,test,sick,sick_group,fn,lkm,date,joo,ip)

# In addition, then there is also possibility for elaborations, i.e. the adjustments, of the models, 
# but I'll leave it from here for the time being... (please do ask if needed: patati 'tilde' utu dot fi) :)
# fyi: https://bookdown.org/content/b472c7b3-ede5-40f0-9677-75c3704c7e5c/more-than-one-mediator.html



#...alternative... not working...
# Function to perform mediation analysis using glm
# loop_med_simplified1a <- function(Treatment, Mediator, Outcome, tv_all, Group, name, simss, t.val, test, sick, sick_group) {
#   # Determine the condition based on the group
#   cond <- switch(Group,
#                  'female' = tv_all[,'Gender'] == min(tv_all[,'Gender']),
#                  'male' = tv_all[,'Gender'] == max(tv_all[,'Gender']),
#                  'All' = rep(TRUE, nrow(tv_all)))
#   
#   # Filter the data based on the condition and sick status
#   tv_red <- if (sick == 'yes') tv_all[cond & as.vector(sick_group), ] else tv_all[cond, ]
#   
#   # Extract treatment, mediator, and outcome variables
#   X <- tv_red[, Treatment]
#   M <- tv_red[, Mediator]
#   Y <- tv_red[, Outcome]
#   
#   # Combine the data into a single data frame
#   Data <- data.frame(cbind(X, M, Y))
#   colnames(Data) <- gsub(" ", "_", colnames(Data))
#   
#   # Calculate control and treatment values
#   control.value <- colMins(as.matrix(X))
#   treat.value <- colMaxs(as.matrix(X))
#   
#   # Initialize variables for storing results
#   res <- data.frame()
#   rn <- c()
#   
#   # Loop through each combination of treatment, mediator, and outcome
#   for (i in seq_along(colnames(Y))) {
#     for (j in seq_along(colnames(M))) {
#       for (z in seq_along(colnames(X))) {
#         # Define the formulas for the models
#         fmla1 <- as.formula(paste(colnames(M)[j], "~", colnames(X)[z]))
#         fmla2 <- as.formula(paste(colnames(Y)[i], "~", colnames(X)[z], "+", colnames(M)[j]))
#         
#         # Fit the models using glm
#         b <- glm(fmla1, data = Data, family = gaussian())
#         c <- glm(fmla2, data = Data, family = gaussian())
#         
#         # Perform mediation analysis
#         med_oute <- switch(t.val,
#                            'no' = mediate(b, c, treat = colnames(X)[z], mediator = colnames(M)[j], sims = simss),
#                            'yes' = mediate(b, c, treat = colnames(X)[z], mediator = colnames(M)[j], sims = simss, control.value = control.value[z], treat.value = X[test, z]),
#                            'minmax' = mediate(b, c, treat = colnames(X)[z], mediator = colnames(M)[j], sims = simss, control.value = control.value[z], treat.value = treat.value[z]))
#         
#         # Summarize the mediation analysis results
#         med_out <- summary(med_oute)
#         tmp <- c(med_out$d0, med_out$d0.p, med_out$d0.ci[1], med_out$d0.ci[2], med_out$z0, med_out$z0.p, med_out$z0.ci[1], med_out$z0.ci[2], med_out$n1, med_out$n1.p, med_out$n1.ci[1], med_out$n1.ci[2], med_out$tau.coef, med_out$tau.p, med_out$tau.ci[1], med_out$tau.ci[2])
#         
#         # Store the results
#         res <- rbind(res, tmp)
#         rn <- c(rn, paste(colnames(X)[z], colnames(M)[j], colnames(Y)[i], sep = " "))
#       }
#     }
#   }
#   
#   # Set row and column names for the results
#   rownames(res) <- rn
#   colnames(res) <- c('ACME', 'd0.p', 'd0.ci_l', 'd0.ci_u', 'ADE', 'z0.p', 'z0.ci_l', 'z0.ci_u', 'Proportion Mediated', 'n1.p', 'n1.ci_l', 'n1.ci_u', 'Total Effect', 'tau.p', 'tau.ci_l', 'tau.ci_u')
#   
#   # Order the results by p-value
#   res <- res[order(res[, 2]), ]
#   
#   # Write the results to an Excel file
#   write.xlsx(res, file = paste(name, Group, date, '.xlsx'), append = FALSE, row.names = TRUE)
#   
#   return(res)
# }
# 
# # Function to test the hypothesis using glm
# the_essentials <- function(Treatment, Mediator, Outcome, tv, Group, name, simss, t.val, test, sick, sick_group, fn, lkm, date, joo, ip) {
#   hoi1 <- paste("C:/Users/patati/Desktop/Turk/R/basic causal mediation with the counterfactuals/", fn, sep = '')
#   
#   # Check if the directory exists
#   if (!dir.exists(hoi1)) {
#     stop("The specified directory does not exist.")
#   }
#   
#   setwd(hoi1)
#   
#   name <- paste(simss, 'basic_divisions')
#   Group <- 'All'
#   uh7ma <- tryCatch(loop_med_simplified1a(Treatment, Mediator, Outcome, tv_all, Group, name, simss, t.val, test, sick, sick_group), error = function(e) data.frame(0))
#   save(uh7ma, file = paste(fn, 'all.RData'))
#   
#   Group <- 'female'
#   uh7f <- tryCatch(loop_med_simplified1a(Treatment, Mediator, Outcome, tv_all, Group, name, simss, t.val, test, sick, sick_group), error = function(e) data.frame(0))
#   save(uh7f, file = paste(fn, 'female.RData'))
#   
#   Group <- 'male'
#   uh7m <- tryCatch(loop_med_simplified1a(Treatment, Mediator, Outcome, tv_all, Group, name, simss, t.val, test, sick, sick_group), error = function(e) data.frame(0))
#   save(uh7m, file = paste(fn, 'male.RData'))
# }
# 
# # Example usage
# simss <- 100
# name <- 'Jaot_OK_basica'
# joo <- 'ei'
# ip <- 1
# ccova <- tv[, c("Steatosis.Grade.0.To.3", "Fibrosis.Stage.0.to.4", "Necroinflammation", "HOMA-IR")]
# sick_group <- rowSums(ccova) > 4
# file_names <- c("Steatosis", "Fibrosis", "Necroinflammation", "HOMAIR", 'Menopause')
# lkm <- 30
# joo <- 'ei'
# ip <- 1
# sick <- 'yes'
# t.val <- 'no'
# Group <- 'All'
# 
# # All
# joo <- 'joo'
# sick <- 'no'
# ccova <- tv[, c("Steatosis.Grade.0.To.3", "Fibrosis.Stage.0.to.4", "Necroinflammation", "HOMA-IR")]
# fn <- 'All'
# sick_group <- rowSums(ccova) > 4
# 
# # Drive the analysis
# the_essentials(Treatment, Mediator, Outcome, tv_all, Group, name, simss, t.val, test, sick, sick_group, fn, lkm, date, joo, ip)
# 
# uh7ma <- loop_med_simplified1a(Treatment, Mediator, Outcome, tv_all, Group, name, simss, t.val, test, sick, sick_group)

Making Heatmaps for Indirect and Direct Effects

# setwd("C:/Users/patati/Desktop/Turku/R/tests6/tests_basic/") #check this if needed...
library(readxl)
all_all=read_xlsx(path = "C:/Users/patati/Desktop/Turku/R/tests6/tests_basic/100basic All tikka3624 .xlsx") # #_1
# all_all=read_xlsx(path = "C:/Users/patati/Desktop/Turku/R/hypo_basic/100 hypo_b_no_not sick All tikka221024 .xlsx") # #_2 :)
all_all=as.data.frame(all_all); all_all=all_all[!is.na(all_all[,1]),];rownames(all_all)=all_all[,1]; all_all=all_all[,2:dim(all_all)[2]]; all_all=all_all[rev(order(all_all[,1])),]
all_all=all_all; #all_all=all_all[all_all[,1]>0,]
#https://stats.stackexchange.com/questions/282155/causal-mediation-analysis-negative-indirect-and-total-effect-positive-direct
#https://www.researchgate.net/post/How_can_I_interpret_a_negative_indirect_effect_for_significant_mediation
#https://stackoverflow.com/questions/31518150/gsub-in-r-is-not-replacing-dot replacing dot
groups[,'Abbreviation'][groups[,'Abbreviation']=='17aOH-P4']='17a-OHP4'

#Switch = 0: PFAS vs steroids; switch=1: PFAS vs BAs and lipids, switch=2: steroids vs BAs and lipids (0-2 with both ACME and ADE (z='dir'))
houdees=function(hoi, rt2, switch,mn,z,corr,date,neg) {
  indir=c(); dir=c(); ip=c();rn=c();rn2=c()
  Outcome=colnames(tv_covNS)[c(29:51,59:71)]; #The final dataframe is shorter or the like so there were less variables here...
  Treatment=colnames(tv_covNS)[52:58];
  ##https://sparkbyexamples.com/r-programming/r-remove-from-vector-with-examples/

  #direct...
  if (switch==1) {
    Mediator_ok=Outcome[Outcome %in% names(table(hoi[1:dim(hoi)[1],c(3)]))]
    for (i in 1:7) {for (j in 1:length(Mediator_ok)) {
      if (z=='dir') {
        ap=rt2[which(hoi[,1]==Treatment[i] & hoi[,3]==Mediator_ok[j]),]; if (!is.na(median(ap[,'ADE']))) {if (median(ap[,'ADE'])< quantile(rt2[,'ADE'],0.5)) {apa=min(ap[,'ADE']);ape=ap[ap[,'ADE']==min(ap[,'ADE']),'z0.p']} else {apa=max(ap[,'ADE']);ape=ap[ap[,'ADE']==max(ap[,'ADE']),'z0.p']}} else {apa=0;ape=1}
      indir=append(indir,apa) #or c(1) hoi 1 or 5 (5 is orig)
      ip=append(ip,ape)} else {        ap=rt2[which(hoi[,1]==Treatment[i] & hoi[,3]==Mediator_ok[j]),]; if (!is.na(median(ap[,'ACME']))) {if (median(ap[,'ACME'])< quantile(rt2[,'ACME'],0.5)) {apa=min(ap[,'ACME']);ape=ap[ap[,'ACME']==min(ap[,'ACME']),'d0.p']} else {apa=max(ap[,'ACME']);ape=ap[ap[,'ACME']==max(ap[,'ACME']),'d0.p']}} else {apa=0;ape=1}
      indir=append(indir,apa) #or c(1) hoi 1 or 5 (5 is orig)
      ip=append(ip,ape)}
      rn=append(rn,hoi[,3][which(hoi[,1]==Treatment[i] & hoi[,3]==Mediator_ok[j])[1]]) #change this...
      rn2=append(rn2,hoi[,1][which(hoi[,1]==Treatment[i] & hoi[,3]==Mediator_ok[j])[1]])
      
      Matrix <- matrix(0, nrow = length(Treatment), ncol = length(Mediator_ok))
      myData <- data.frame(matrix = Matrix)
      colnames(myData) <- Mediator_ok; rownames(myData) <- Treatment
      
      
    }}} else if (switch==0) {
      
      # indir:
      Mediator_ok=colnames(tv_all)[9:28][colnames(tv_all)[9:28] %in% names(table(hoi[1:dim(hoi)[1],c(2)]))]
      for (i in 1:7) {for (j in 1:length(Mediator_ok)) {
      if (z=='dir') {
        ap=rt2[which(hoi[,1]==Treatment[i] & hoi[,2]==Mediator_ok[j]),]; if (!is.na(median(ap[,'ADE']))) {if (median(ap[,'ADE'])< quantile(rt2[,'ADE'],0.5)) {apa=min(ap[,'ADE']);ape=ap[ap[,'ADE']==min(ap[,'ADE']),'z0.p']} else {apa=max(ap[,'ADE']);ape=ap[ap[,'ADE']==max(ap[,'ADE']),'z0.p']}} else {apa=0;ape=1}
      indir=append(indir,apa) #or c(1) hoi 1 or 5 (5 is orig)
      ip=append(ip,ape)} else {        ap=rt2[which(hoi[,1]==Treatment[i] & hoi[,2]==Mediator_ok[j]),]; if (!is.na(median(ap[,'ACME']))) {if (quantile(ap[,'ACME'],0.50) < quantile(rt2[,'ACME'],0.5)) {apa=min(ap[,'ACME']);ape=ap[ap[,'ACME']==min(ap[,'ACME']),'d0.p']} else 
        {apa=max(ap[,'ACME']);ape=ap[ap[,'ACME']==max(ap[,'ACME']),'d0.p']}} else {apa=0;ape=1}
      indir=append(indir,apa) #or c(1) hoi 1 or 5 (5 is orig)
      ip=append(ip,ape)}
        rn=append(rn,hoi[,2][which(hoi[,1]==Treatment[i] & hoi[,2]==Mediator_ok[j])[1]]) #change this...
        rn2=append(rn2,hoi[,1][which(hoi[,1]==Treatment[i] & hoi[,2]==Mediator_ok[j])[1]])
        
      Matrix <- matrix(0, nrow = length(Treatment), ncol = length(Mediator_ok))
      myData <- data.frame(matrix = Matrix)
      colnames(myData) <- Mediator_ok; rownames(myData) <- Treatment
        
        
      }}} else if (switch==2) {
        Treatment=colnames(tv_all)[9:28]; # These names are a bit mixed, by the idea is ok.
        Mediator_ok=Outcome[Outcome %in% names(table(hoi[1:dim(hoi)[1],c(3)]))]
        # df = data.frame(matrix("", nrow = length(Treatment), ncol = length(Mediator_ok))) 
        
    for (i in 1:length(Treatment)) {for (j in 1:length(Mediator_ok)) {
      if (z=='dir') {
        ap=rt2[which(hoi[,2]==Treatment[i] & hoi[,3]==Mediator_ok[j]),]; if (!is.na(median(ap[,'ADE']))) {if (median(ap[,'ADE'])<quantile(rt2[,'ADE'],0.5)) {apa=min(ap[,'ADE']);ape=ap[ap[,'ADE']==min(ap[,'ADE']),'z0.p']} else {apa=max(ap[,'ADE']);ape=ap[ap[,'ADE']==max(ap[,'ADE']),'z0.p'];}} else {apa=0;ape=1}
      indir=append(indir,apa) #or c(1) hoi 1 or 5 (5 is orig)
      ip=append(ip,ape)} else {        ap=rt2[which(hoi[,2]==Treatment[i] & hoi[,3]==Mediator_ok[j]),]; if (!is.na(median(ap[,'ACME']))) {if (median(ap[,'ACME']) < quantile(rt2[,'ACME'],0.5)) {apa=min(ap[,'ACME']);ape=ap[ap[,'ACME']==min(ap[,'ACME']),'d0.p']} else {apa=max(ap[,'ACME']);ape=ap[ap[,'ACME']==max(ap[,'ACME']),'d0.p']}} else {apa=0;ape=1}
      indir=append(indir,apa) #or c(1) hoi 1 or 5 (5 is orig)
      ip=append(ip,ape)}

      rn=append(rn,hoi[,3][which(hoi[,2]==Treatment[i] & hoi[,3]==Mediator_ok[j])[1]]) #change this...
      rn2=append(rn2,hoi[,2][which(hoi[,2]==Treatment[i] & hoi[,3]==Mediator_ok[j])[1]])
      Matrix <- matrix(0, nrow = length(Treatment), ncol = length(Mediator_ok))
      myData <- data.frame(matrix = Matrix)
      colnames(myData) <- Mediator_ok; rownames(myData) <- Treatment
      
      
      }}} # You need three of these, yes :) ;print(ap[,'ADE']) print(rownames(ap[ap[,'ADE']==min(ap[,'ADE']),]))
  
  tot=cbind(rn2,rn,indir) #or indir or dir
  tot=tot[!is.na(tot[,1]),]
  tot=as.data.frame(tot)#
  
  # uu=data.frame();hou=c()
  
  # if (neg=='no') {
  # for (i in 1:length(Treatment)) {hou=names( table(tot[tot[,1]==Treatment[i],2])[table(tot[tot[,1]==Treatment[i],2])>0]);#print(hou)
  # for (j in 1:length(hou)) {uu=rbind(uu, max(tot[tot[,1]==Treatment[i] & tot[,2]==hou[j],3]))}} #
  # rr=data.frame()
  # for (i in 1:length(Treatment)) {hou=names( table(tot[tot[,1]==Treatment[i],2])[table(tot[tot[,1]==Treatment[i],2])>0]);#print(hou)
  # for (j in 1:length(hou)) {rr=rbind(rr, rownames(tot[tot[,3] == max(tot[tot[,1]==Treatment[i] & tot[,2]==hou[j],3]),]))}};uim=cbind(uu,rr);tot=tot[uim[,2],]} else if
  # (neg=='yes') {for (i in 1:length(Treatment)) {hou=names( table(tot[tot[,1]==Treatment[i],2])[table(tot[tot[,1]==Treatment[i],2])>0]);#print(hou)
  # for (j in 1:length(hou)) {uu=rbind(uu, min(tot[tot[,1]==Treatment[i] & tot[,2]==hou[j],3]))}} #
  # rr=data.frame()
  # for (i in 1:length(Treatment)) {hou=names( table(tot[tot[,1]==Treatment[i],2])[table(tot[tot[,1]==Treatment[i],2])>0]);#print(hou)
  # for (j in 1:length(hou)) {rr=rbind(rr, rownames(tot[tot[,3] == min(tot[tot[,1]==Treatment[i] & tot[,2]==hou[j],3]),]))}};uim=cbind(uu,rr);tot=tot[uim[,2],]} else if
  # (neg=='else') {tot=tot}

  # Here you would need to have an evaluation for the 'else', if it is negative, it needs to be max negative, and vice versa for the
  # There does not seem to be doubles: table(tot[,1])
  

  # for (i in 1:length(Treatment)) {hou=names( table(tot[tot[,1]==Treatment[i],2])[table(tot[tot[,1]==Treatment[i],2])>0]);#print(hou)
  # for (j in 1:length(hou)) {uu=rbind(uu, max(tot[tot[,1]==Treatment[i] & tot[,2]==hou[j],3]))}} #
  # rr=data.frame()
  # for (i in 1:length(Treatment)) {hou=names( table(tot[tot[,1]==Treatment[i],2])[table(tot[tot[,1]==Treatment[i],2])>0]);#print(hou)
  # for (j in 1:length(hou)) {rr=rbind(rr, rownames(tot[tot[,3] == max(tot[tot[,1]==Treatment[i] & tot[,2]==hou[j],3]),]))}};
  # 
  # uim=cbind(uu,rr);tot=tot[uim[,2],]
  # 
  tot[,3]=as.numeric(tot[,3])
  
  library(reshape2)
  jops=dcast(tot, rn2~rn, value.var='indir')
  jops[is.na(jops)]=0
  rownames(jops)=jops[,1]
  jops=jops[,2:dim(jops)[2]]
  jops=as.data.frame(jops)
  jopsr=matrix(as.numeric(unlist(jops)),nrow=dim(jops)[1],ncol=dim(jops)[2])
  colnames(jopsr)=colnames(jops);rownames(jopsr)=rownames(jops)
  
  # print(dim(jopsr)[1] == dim(myData)[1]);print(dim(jopsr)[2] == dim(myData)[2])
  
  if (sum(!rownames(myData) %in% rownames(jopsr))>0) {
  to_df=rownames(myData)[!rownames(myData) %in% rownames(jopsr)]
  jopsr=rbind(jopsr,myData[to_df,]); jopsr=jopsr[rownames(myData),]}
  if (sum(!colnames(myData) %in% colnames(jopsr))>0) {
  to_df=colnames(myData)[!colnames(myData) %in% colnames(jopsr)]
  jopsr=cbind(jopsr,myData[,to_df]); jopsr=jopsr[,colnames(myData)]}
  # 

  # if (switch==1) {
  #   #for direct:
  #   jopsr=jopsr[,Outcome[Outcome %in% colnames(jopsr)]] #c
  # } else if (switch==0 ) {
  #   #for indirect:
  #   ums=groups[order(groups[,'Group']),'Abbreviation']
  #   jopsr=jopsr[,ums[ums %in% colnames(jopsr)]] 
  # }

  tot=cbind(rn2,rn,ip)
  tot=tot[!is.na(tot[,1]),]
  tot=as.data.frame(tot)
  # if (neg=='no' | neg=='yes' ) {tot=tot[uim[,2],]} else {tot=tot}
  tot[,3]=as.numeric(tot[,3])
  
  library(reshape2)
  jopsa=dcast(tot, rn2~rn, value.var='ip')
  jopsa[is.na(jopsa)]=0
  rownames(jopsa)=jopsa[,1]
  jopsa=jopsa[,2:dim(jopsa)[2]]
  jopsra=matrix(as.numeric(unlist(jopsa)),nrow=dim(jopsa)[1],ncol=dim(jopsa)[2])
  colnames(jopsra)=colnames(jopsa);rownames(jopsra)=rownames(jopsa)
  colnames(jopsra)=colnames(jopsa);rownames(jopsra)=rownames(jopsa)
  
  if (sum(!rownames(myData) %in% rownames(jopsra))>0) {
  to_df=rownames(myData)[!rownames(myData) %in% rownames(jopsra)]
  jopsra=rbind(jopsra,myData[to_df,]); jopsra=jopsra[rownames(myData),]}
  if (sum(!colnames(myData) %in% colnames(jopsra))>0) {
  to_df=colnames(myData)[!colnames(myData) %in% colnames(jopsra)]
  jopsra=cbind(jopsra,myData[,to_df]); jopsra=jopsra[,colnames(myData)]}
 

  # df

  
  if (switch==1) {
    #for direct:
    # jopsra=jopsra[,Outcome[Outcome %in% colnames(jopsra)]];
    # jopsr=jopsr[,Outcome[Outcome %in% colnames(jopsr)]] 
    # jopsra=jopsra[groups[,'Abbreviation'][groups[,'Abbreviation'] %in% rownames(jopsra)],]
    # jopsr=jopsr[groups[,'Abbreviation'][groups[,'Abbreviation'] %in% rownames(jopsr)],]
    
    
  } else if (switch==0) {
    #for indirect
    jopsra=jopsra[,groups[,'Abbreviation'][groups[,'Abbreviation'] %in% colnames(jopsra)]]
    # jopsra=jopsra[,groups[,'Abbreviation'][groups[,'Abbreviation'] %in% colnames(jopsra)]]
    #groups[,'Abbreviation'][groups[,'Abbreviation'] %in% colnames(jopsra)]
    # jopsra=jopsra[,ums[ums %in% colnames(jopsr)]] 
    jopsr=jopsr[,groups[,'Abbreviation'][groups[,'Abbreviation'] %in% colnames(jopsr)]] 
  } else if (switch==2) {
    jopsra=jopsra[groups[,'Abbreviation'],]
    jopsr=jopsr[groups[,'Abbreviation'],]
    # #groups[,'Abbreviation'][groups[,'Abbreviation'] %in% colnames(jopsra)]
    # ums=groups[,'Abbreviation']
    # jopsra=jopsra[ums,];
    # jopsr=jopsr[ums,]
    } 
  
  setwd("C:/Users/patati/Desktop/Turku/R/") #check this if needed...
  hip1='transpose';pch.cex=2; #width = 5000;height=2000 width = 2500;height=4000 width = 4000;height=2500;
  ho=paste('PFAS vs. bas and lipids_for the hypo_basic_colors_stea', switch)
  if (dim(jopsr)[1]==7) {width = 4000;height=1500} else if (dim(jopsr)[1]==20) {width = 4000;height=2500} else if (dim(jopsr)[1]==36) {width = 2500;height=5000}
  resulta1=jopsr
  p.mat.a1=jopsra
  #https://www.rdocumentation.org/packages/corrplot/versions/0.92/topics/corrplot
  #https://cran.r-project.org/web/packages/corrplot/vignettes/corrplot-intro.html
  #https://statisticsglobe.com/change-font-size-corrplot-r
  #order can be: alphabet, hclust, original #https://stackoverflow.com/questions/51115495/how-to-keep-order-of-the-correlation-plot-labels-as-same-in-the-datafile
  # if (switch==1) {rbo=rev(COL2('RdBu')[25:(length(COL2('RdBu'))-25)]) } else if (switch==0  | switch==2) {rbo=rev(COL2('RdBu')[25:100]) } 
  # This has been driven:
  # resulta1=resulta1[,colnames(resulta2)] #In case you need the column names elsewhere
  # for (i in 1:dim(resulta1)[1]) {for (j in 1:dim(resulta1)[2]) {if (resulta1[i,j]==0) {p.mat.a1[i,j]=0.5}}}
  # p.mat.a1$column <- unlist(p.mat.a1$column)
  # resulta1$column <- unlist(resulta1$column)
  # resulta1 <- as.matrix(resulta1); resulta1 <- as.matrix(p.mat.a1)
  # resulta1 <- as.matrix(m1);p.mat.a1 <- as.matrix(m4)
  # resulta1 <- as.matrix(the_real);p.mat.a1 <- as.matrix(the_real2)
  # rbo=rev(COL2('RdBu')[25:(length(COL2('RdBu'))-25)])
  # With the neg. you do not put these:
  if (dim(resulta1)[2]==36) {
  Outcome=colnames(tv_covNS)[c(29:51,59:71)];
  Outcome=Outcome[c(1,23,2:22,24:length(Outcome))]
  resulta1=resulta1[,Outcome[Outcome %in% colnames(resulta1) ]];p.mat.a1=p.mat.a1[,Outcome[Outcome %in% colnames(p.mat.a1) ]] }
  for (i in 1:dim(resulta1)[1]) {for (j in 1:dim(resulta1)[2]) {if (resulta1[i,j]==0) {p.mat.a1[i,j]=0.5}}} 
  # resulta1 <- t(resulta1);
  # p.mat.a1 <- t(p.mat.a1)
  # ou=round(min(c(abs(max(resulta1)),abs(min(resulta1)))),2)
  # op=ou-0.01
  heps=abs(round((min(resulta1)),1)); hepsa=abs(round((max(resulta1)),1))
  heps2=abs(round((max(resulta1)-0.01),3));heps3=abs(round((min(resulta1)+0.01),3));
  
  if (hepsa-heps >=0 ) {resulta1[resulta1 >= heps2] = hepsa; resulta1[resulta1 <= -heps3] = -hepsa; heps=hepsa} else 
  if (hepsa-heps < 0 ) {resulta1[resulta1 <= -heps] = -heps; resulta1[resulta1 >= heps2] = heps}  
  
  # resulta1=resulta1+0.1 ; 
  
  # hist(as.numeric(unlist(resulta1)),breaks=30,ylim=c(0.0,40))  #xlim=c(0.04,0.4),
  # resulta1[resulta1 > 1]  = 1
  # resulta1[resulta1 < -1] = -1 #col.lim=c(-0.4,0.4))
 
  path="C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis/"; setwd(path) #check this if needed...
  jpeg(paste("Heatmap of high",date, mn,neg,".jpg"), width = width, height = height, quality = 100,pointsize = 14, res=300);# par( ps=ps)# par(cex.lab=90) 22 18
  # col = brewer.pal(n = 9, name = "YlOrRd")
  order="original"; range='orig';corre='no_renormaa'; type='full'; method='color';ga='All';gf='Female';gm='Male' #color square
  cl.offset=20;cl.length=7;cl.cex = 1.45;pch.cex=2.45;pch=2;cl.pos = 'r'; #cl.offset=2;cl.length=5;cl.cex = 1.3;pch.cex=1.95;pch=14;
  
  col=colorRampPalette(c('blue', 'white','orange'), alpha = TRUE)(150)
  if (neg=='no') {col=colorRampPalette(c( 'white','orange'), alpha = TRUE)(150)} else if (neg=='yes')
    {col=colorRampPalette(c('blue', 'white'), alpha = TRUE)(150)} else {col=col}

  # if (corr==TRUE) {if (min(as.matrix(resulta1))< -1  | max(as.matrix(resulta1))> 1) {resulta1=rango(resulta1,-1,1)}} else if (min(as.matrix(resulta1)) >= 0)  {resulta1=rango(resulta1,-1,1)} #
  # resulta1=rango(resulta1,-1,1)
  # if (min(as.matrix(resulta1)) >= 0  | max(as.matrix(resulta1)) <= 0) {resulta1=rango(resulta1,-1,1)}

  corrplot(as.matrix(resulta1), type = type, order = order,method=method, p.mat=as.matrix(p.mat.a1), tl.col = "black", #sum(COL2('RdBu')=="#FF7417")
           cl.cex = cl.cex, pch.cex=pch.cex, pch.col='black',pch=pch,#pitikö vain pch lisätä pch väriin väriin... mystistä...'#FEE12B'
           sig.level = c(0.05),cl.pos = cl.pos, insig = "label_sig", cl.offset=cl.offset,cl.length=cl.length, #.001, .05, .2
           tl.srt = 90, diag = TRUE,col=col,is.corr = corr,col.lim=c(-heps,heps)) #only in age...0.001,col.lim=c(-heps,heps) #rev(COL2('RdBu')[25:(length(COL2('RdBu'))-25)]) col.lim=c(-1,1) col.lim=c(-heps,heps)
  #non were significant in neg... but after mody yes!

  dev.off(); eoh=paste("Heatmap of high",date, mn,neg,".jpg"); daiR::image_to_pdf(eoh, pdf_name=paste0(eoh,'.pdf'))
  my_image <- image_read(eoh);my_svg <- image_convert(my_image, format="svg"); image_write(my_svg, paste(eoh,".svg"))
  
  return(list(resulta1,p.mat.a1))
  }

# Switch = 0: PFAS vs steroids; 
# Switch = 1: PFAS vs BAs and lipids, 
# Switch = 2: steroids vs BAs and lipids (0-2 with both ACME and ADE (z='dir'))
# corr=TRUE;z='idir' # uliulie2=houdees(hoi, switch=1,mn='indiruush',z,corr,date); dim(uliulie2[[1]]) 

# #Let's get this comparable done:
# First the 'matrisse'


corr=FALSE;z='dir'; switch=2 
u3=all_all; c1=c(); #mn='basicas' #
c1= u3 #[u3[,'ADE'] < ADEMedian  & DV<ADEVar,] #& u3[,'z0.p']<ADEpval
# c1 = c1[ ((c1[,'ACME']-c1[,'ADE']) > 0), ] #
# c1 = c1[c1[,'d0.p']<0.49, ];
# c1= c1[sample(1:nrow(c1)), ];
# c1=c1[rev(order(c1[,'ACME'])),];
# c1=c1[rev(order(c1[,'ADE'])),]; #
# c1=c1[c1[,'z0.p'] < 0.89, ] # 89:llä päästään oikeaan hiiridataan
# c1=c1[c1[,'ACME'] >= 0,] # -0.01,]#quantile(c1[,'ACME'])[2]
rt2=c1[complete.cases(c1), ] #0.49 is optimal p value cutoff to get dim(mat[[2]])[2] as 36
hoi = c(); hoi=scan(text=rownames(rt2), what="")#scan(text=rownames(rt2), what="")
hoi = matrix(hoi, ncol = 3,  byrow = TRUE); colnames(hoi)=c('Contaminants','Steroids','Bile Acids or Lipids')#,'Gender') ,'Desig.')
hoi[,c(2)]  <- gsub("\\.", "-",  hoi[,c(2)]  ); hoi[,'Steroids' ][hoi[,'Steroids' ]=='17aOH-P4']='17a-OHP4'
mn=paste0(z,'ade_s_vs_bal_no order3a'); neg='else'
mat=houdees(hoi, rt2, switch,mn,z,corr,date,neg);
# dim(mat[[2]])[2] # Indirect effect, kasvata... dim(mat[[2]])[2] == 36
# sum(mat[[1]]==0)

corr=FALSE;z='dir'; switch=1; 

u3=all_all;
c1=c() #
c1= u3 #[u3[,'ADE'] < ADEMedian  & DV<ADEVar,] #& u3[,'z0.p']<ADEpval
c1=c1[rev(order(c1[,'ADE'])),]; #
# c1=c1[rev(order(c1[,'ACME'])),];
# c1= c1[sample(1:nrow(c1)), ];
# c1=c1[ ((c1[,'ADE']-c1[,'ACME']) > 0), ] #
c1=c1[c1[,'z0.p'] < 0.89, ] #0.98/0.81/0.45/0.53/0.1... but gives 33 columns so use the other then..
# c1=c1[c1[,'d0.p'] < 0.50, ]
 #let us start with the above optima... 643/or similar is dim(c1)[1] so need higher p to get all; (dim(c1)[1]-1)
#Check if this needs to be ADE instead...
c3=c1[c1[,'ADE'] < 0,] # -0.01,]#quantile(c1[,'ACME'])[2]
# c3=c1[c1[,'ACME'] < 0,]
# 
# c3= c3[sample(1:nrow(c3)), ];
c1=c3;
rt2=c1[complete.cases(c1), ]
hoi=c(); hoi=scan(text=rownames(rt2), what="")#scan(text=rownames(rt2), what="")
hoi=matrix(hoi, ncol = 3,  byrow = TRUE); colnames(hoi)=c('Contaminants','Steroids','Bile Acids or Lipids')#,'Gender') ,'Desig.')
hoi[,c(2)]  <- gsub("\\.", "-",  hoi[,c(2)]  ); hoi[,'Steroids' ][hoi[,'Steroids' ]=='17aOH-P4']='17a-OHP4'
# mat_neg=list(c(1,2),c(3,4))
mn=paste0(z,'neg');neg='yes'
mat_neg=houdees(hoi, rt2, switch,mn,z,corr,date,neg); 
# dim(mat_neg[[2]])[2]  #kasvata n, jotta dim(mat_pos[[2]])[2] yhtäkuin kuin length(c(x3,x6)), i.e. 36

u3=all_all; c1=c(); #mn='posae'
c1= u3 #[u3[,'ADE'] < ADEMedian  & DV<ADEVar,] #& u3[,'z0.p']<ADEpval
c1=c1[rev(order(c1[,'ACME'])),]; #Check acmes and ades
# c1=c1[rev(order(c1[,'ADE'])),]; 
# c1=c1[ ((c1[,'ACME']-c1[,'ADE']) > 0), ] #
# c1=c1[c1[,'d0.p']<0.50, ] # Let me uset the same optimal value as above 0.33
c1=c1[c1[,'z0.p']<0.89, ]
# c2=c1[rev(order(c1[,'ACME'])),]; #350 is optimal as per hand driven optimization 280; check if this needs to be ACME/ADE
# c2=c1[c1[,'ACME'] >= 0,] #c2[c1[,'ACME']>0.01,]#quantile(c1[,'ACME'])[3]
c2=c1[c1[,'ADE'] >= 0,]
c1=c2; # 
# c1= c1[sample(1:nrow(c1)), ];
rt2=c1[complete.cases(c1), ] # rt2[1,]=rt2[1,]
hoi=c(); hoi=scan(text=rownames(rt2), what="")#scan(text=rownames(rt2), what="")
hoi=matrix(hoi, ncol = 3,  byrow = TRUE); colnames(hoi)=c('Contaminants','Steroids','Bile Acids or Lipids')#,'Gender') ,'Desig.')
hoi[,c(2)]  <- gsub("\\.", "-",  hoi[,c(2)]  ); hoi[,'Steroids' ][hoi[,'Steroids' ]=='17aOH-P4']='17a-OHP4'
mn=paste0(z,'posa2aa'); neg='no'
mat_pos=houdees(hoi, rt2, switch,mn,z,corr,date,neg='no');
#dim(mat_pos[[2]])[2] #kasvata n, jotta dim(mat_pos[[2]])[2] yhtäkuin kuin length(c(x3,x6)), i.e. 36

# mg=mat_pos[[1]]
# ma=mat_neg[[1]]
# # hist(as.numeric(unlist(ma)),breaks=30)  #xlim=c(0.04,0.4), ylim=c(0.0,20),xlim=c(-0.5,0.5)
# # hist(as.numeric(unlist(mg)),breaks=30)  #xlim=c(0.04,0.4), ylim=c(0.0,20),xlim=c(-0.5,0.5)
# totu = mg == 0 #< quantile(as.numeric(unlist(mat_pos[[1]]))[as.numeric(unlist(mat_pos[[1]]))>0.01],0.05) #0.19
# rpl=ma[totu]
# mg[totu]=rpl
# mgg=mat_pos[[2]]
# maa=mat_neg[[2]]
# rpl2=maa[totu]
# mgg[totu]=rpl2
#  
# # hist(as.numeric(unlist(ma)),breaks=30)  #xlim=c(0.04,0.4), ylim=c(0.0,20),xlim=c(-0.5,0.5)
# # hist(as.numeric(unlist(mg)),breaks=30)  #xlim=c(0.04,0.4), ylim=c(0.0,20),xlim=c(-0.5,0.5)
# 
# # Eli nollat ja liian lähellä negatiiviset nollat on tässä tulkittu positiivisiksi ACME:iksi, johtuen myös mediaanien erosta (neg puoli vs pos puoli)
# mg=mat_pos[[1]]
# ma=mat_neg[[1]]
# totu = ma > quantile(as.numeric(unlist(mat_neg[[1]]))[as.numeric(unlist(mat_neg[[1]])) < -0.01],0.95) #-0.09; 0.8 #median(as.numeric(unlist(mat_neg[[1]]))[as.numeric(unlist(mat_neg[[1]])) < -0.01]) #tässä tätä mediaanieroa, vrt. yllä oleva
# rpl=mg[totu] # ei neg adessa
# ma[totu]=rpl
# mgg=mat_pos[[2]]
# maa=mat_neg[[2]]
# rpl2=mgg[totu]
# maa[totu]=rpl2

the_real=c();the_real <- matrix(0, nrow = dim(mat[[1]])[1], ncol = dim(mat[[1]])[2]); the_real <- data.frame( the_real)
colnames(the_real) <- colnames(mat[[1]]); rownames(the_real) <- rownames(mat[[1]])
the_real2=c();the_real2 <- matrix(0, nrow = dim(mat[[1]])[1], ncol = dim(mat[[1]])[2]); the_real2 <- data.frame(the_real2)
colnames(the_real2) <- colnames(mat[[1]]); rownames(the_real2) <- rownames(mat[[1]])

m1=mat[[1]]; m2=mat_pos[[1]]; m3=mat_neg[[1]]; m4=mat[[2]]; m5=mat_pos[[2]]; m6=mat_neg[[2]]
# m3=m3[,Outcome[Outcome %in% colnames(m3) ]]; m6=m6[,Outcome[Outcome %in% colnames(m6) ]]

#Old logic:
# for (i in rownames(m2)) {for (j in colnames(m2)) {if (m2[i,j]==0) {the_real[i,j]=m1[i,j];the_real2[i,j]=m4[i,j]}}}
# for (i in rownames(m3)) {for (j in colnames(m3)) {if (m3[i,j]==0) {the_real[i,j]=m1[i,j];the_real2[i,j]=m4[i,j]}}}
# for (i in rownames(m2)) {for (j in colnames(m2)) {if (m5[i,j]<0.1) {the_real[i,j]=m2[i,j];the_real2[i,j]=m5[i,j]}}}
# for (i in rownames(m3)) {for (j in colnames(m3)) {if (m6[i,j]<0.1) {the_real[i,j]=m3[i,j];the_real2[i,j]=m6[i,j]}}}

#or... new logic:
the_real=m1;the_real2=m4
# for (i in rownames(m2)) {for (j in colnames(m2)) {if (the_real2[i,j] > 0 ) {the_real[i,j]=m1[i,j];the_real2[i,j]=m4[i,j]}}}
the_real[the_real >= 0 & the_real < 0.03] = m2[the_real >= 0 & the_real < 0.03]
the_real2[the_real >= 0 & the_real < 0.03] = m5[the_real >= 0 & the_real < 0.03]
the_real[the_real < 0 & the_real > -0.04] = m3[the_real < 0 & the_real > -0.04]
the_real2[the_real < 0 & the_real > -0.04] = m6[the_real < 0 & the_real > -0.04]

# resulta1_big_id=mg #ma
# p.mat.a1_big_id=mgg #maa
# resulta1_big_id=ma
# p.mat.a1_big_id=maa
resulta1_big_id=the_real
p.mat.a1_big_id=the_real2

# resulta1_big_id=m1
# p.mat.a1_big_id=m4

resulta1 <- as.matrix(resulta1_big_id);p.mat.a1 <- as.matrix(p.mat.a1_big_id)
resulta1[is.na(resulta1)]=0
# With the neg. you do not put these:
if (dim(resulta1)[2]==36) {
Outcome=colnames(tv_covNS)[c(29:51,59:71)];
Outcome=Outcome[c(1,23,2:22,24:length(Outcome))]
resulta1=resulta1[,Outcome[Outcome %in% colnames(resulta1) ]];p.mat.a1=p.mat.a1[,Outcome[Outcome %in% colnames(p.mat.a1) ]] }
for (i in 1:dim(resulta1)[1]) {for (j in 1:dim(resulta1)[2]) {if (resulta1[i,j]==0) {p.mat.a1[i,j]=0.5}}}
# resulta1 <- t(resulta1);
# p.mat.a1 <- t(p.mat.a1)
# hist(as.numeric(unlist(resulta1)),breaks=30,ylim=c(0.0,40))  #xlim=c(0.04,0.4),
# heps=abs(round((min(resulta1)-0.1),1))
# heps2=abs(round((min(resulta1)+0.01),2))
# resulta1[resulta1 > heps] = heps
# resulta1[resulta1 < -heps2] = -heps #so this extends the minimum value to the (max) min limit so as to get the true blue to be seen, at least once
# 
# #In addition, you would need:
heps=abs(round((min(resulta1)),1))
heps2=abs(round((min(resulta1)+0.01),2))
resulta1[resulta1 > heps] = heps
resulta1[resulta1 < -heps2] = -heps
# sum(resulta1==0) #ok
# resulta1=abs(min(resulta1))+resulta1
# resulta1[resulta1 < 0] = 0 #so this extends the minimum value to the (max) min limit so as to get the true blue to be seen, at least once
# resulta1[resulta1 > 1]  = 1
# resulta1[resulta1 < -1] = -1 #col.lim=c(-0.4,0.4))
path="C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis/"; setwd(path) #check this if needed...
if (dim(resulta1)[1]==7) {width = 4000;height=1500} else if (dim(resulta1)[1]==20) {width = 4000;height=2500} else if (dim(resulta1)[1]==36) {width = 2500;height=5000}
jpeg(paste("Heatmap of highed_the_real_ade_mat",z,date,".jpg"), width = width, height = height, quality = 100,pointsize = 14, res=300);# par( ps=ps)# par(cex.lab=90) 22 18
# col = brewer.pal(n = 9, name = "YlOrRd")
col=colorRampPalette(c('blue', 'white','orange'), alpha = TRUE)(150)

# col=colorRampPalette(c('white','orange'), alpha = TRUE)(150)
# col=colorRampPalette(c('blue','white'), alpha = TRUE)(150)


order="original"; range='orig';corre='no_renormaa'; type='full'; method='color';ga='All';gf='Female';gm='Male' #color square
cl.offset=25;cl.length=7;cl.cex = 1.1;pch.cex=1.95;pch=3;cl.pos = 'r'; #cl.offset=2;cl.length=5;cl.cex = 1.3;pch.cex=1.95;pch=14;
# if (switch==1) {rbo=rev(COL2('RdBu')[25:(length(COL2('RdBu'))-25)]) } else if (switch==0  | switch==2) {rbo=rev(COL2('RdBu')[25:100])}
# col=colorRampPalette(c( 'white','orange'), alpha = TRUE)(150)
# resulta1=rango(resulta1,-1,1)
# if (min(as.matrix(resulta1))< -1  | max(as.matrix(resulta1))> 1) {resulta1=rango(resulta1,-1,1)}
corrplot(resulta1, type = type, order = order,method=method, p.mat=p.mat.a1, tl.col = "black", #sum(COL2('RdBu')=="#FF7417")
         cl.cex = cl.cex, pch.cex=pch.cex, pch.col='black',pch=pch,#pitikö vain pch lisätä pch väriin väriin... mystistä...'#FEE12B'
         sig.level = c(0.05),cl.pos = cl.pos, insig = "label_sig", cl.offset=cl.offset,cl.length=cl.length,
         tl.srt = 90, diag = TRUE,col=col,is.corr = FALSE,col.lim=c(-heps,heps)) #only in age...0.001, -2,2 #rev(COL2('RdBu')[25:(length(COL2('RdBu'))-25)]) col.lim=c(min(resulta1),max(resulta1)) col.lim=c(-heps,heps)
# 
dev.off(); eoh=paste("Heatmap of highed_the_real_ade_mat",z,date,".jpg"); daiR::image_to_pdf(eoh, pdf_name=paste0(eoh,'.pdf'))
## png 
##   2
my_image <- image_read(eoh);my_svg <- image_convert(my_image, format="svg"); image_write(my_svg, paste(eoh,".svg"))
path="C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis/"; setwd(path)
  
  
# ACME:n kaa ei jakaudu symmetrisesti

#Copilotointi ei oikein toimi edelliselle..  
knitr::include_graphics('Square Correlation Plot of_korkeatACME PFAS vs. steroids_ for the hypos_colors_stea 0 All transpose .jpg')
Heatmap of 'Indirect' Effects PFAS vs. Steroids with All Subjects

Heatmap of ‘Indirect’ Effects PFAS vs. Steroids with All Subjects

knitr::include_graphics('Square Correlation Plot of_korkeatACME PFAS vs. steroids_ for the hypos_colors_stea 1 All transpose .jpg')
Heatmap of 'Direct' Effects PFAS vs. Lipids and Bile Acids with All Subjects

Heatmap of ‘Direct’ Effects PFAS vs. Lipids and Bile Acids with All Subjects

knitr::include_graphics('Heatmap of high ACME tikka111124 idir .jpg')
Heatmap of 'Indirect' Effects Steroids vs. Bile Acids and Lipids with All Subjects

Heatmap of ‘Indirect’ Effects Steroids vs. Bile Acids and Lipids with All Subjects

Making Sankey Diagrams

#To do these diagrams, you need to have a reduced dataset as well as a function that acounts the group (male/female)
reduced2=function(u3,Group,name,lkm,d) {
  c1=c() # u3=all_all1
  ACMEMedian=c();ACMEpval=c();ACMEVar=c()
  ADEMedian=c();ADEpval=c();ADEVar=c()
  c1= u3 #[u3[,'ADE'] < ADEMedian  & DV<ADEVar,] #& u3[,'z0.p']<ADEpval
  ACMEMedian=0 #median(c1[,'ACME'][c1[,'ACME']>0])
  # c1=c1[order(c1[,'ACME']),];  #for 'negative' acmes
  c1=c1[rev(order(c1[,'ACME'])),];  
  # c1=c1[c1[,'ACME']<ACMEMedian & ((c1[,'ADE']-c1[,'ACME']) > 0), ] # c1=c1[c1[,'d0.p']<0.1, ]
  c1=c1[c1[,'ACME'] > ACMEMedian & ((c1[,'ACME']-c1[,'ADE']) > 0), ] # c1=c1[c1[,'d0.p']<0.1, ] 
  
  c1=tryCatch({c1[1:lkm,]}, error = function(msg){return(c1)})
  
  write.xlsx(c1, file = paste(name,Group,date,'.xlsx'), append = FALSE, row.names = TRUE)
  
  return(c1)}

plottings_sf=function(uh7ma,date,sick,Group,d) {
  # uh7ma = na.omit(c1)
  rt2=uh7ma #[,1:17]# rtot=rtot[,1:17]# rtot=data.frame(rtot) # name=paste(simss,'basic hypothesis',take)# https://stat.ethz.ch/R-manual/R-devel/library/stats/html/p.adjust.html# https://www.middleprofessor.com/files/applied-biostatistics_bookdown/_book/adding-covariates-to-a-linear-model# https://github.com/MarioniLab/miloR# https://www.nature.com/articles/s41467-023-40458-9/figures/4
  name=paste('Contaminants_Steroids_BAs_or_Lipids_sims',date) # rtot=rtot_2000_mrct # rtot=uh5
  
  hoi=c(); 
  if (d=='t') {hoi=scan(text=rt2[,1] , what=" ")} else {hoi=scan(text=rownames(rt2) , what=" ")} #rownames(rt2)# names(rt2[,1]) rownames(rt2)
  # hoi=scan(text=rownames(rt2) , what=" ")#rownames(rt2)# names(rt2[,1]) rownames(rt2)
  hoi=as.data.frame(matrix(hoi, ncol = 3,  byrow = TRUE), stringsAsFactors = FALSE) #Check this number (ncol) 3/4
# hoi=cbind(hoi[,1:3],rt2[,2])
  hoi=hoi[,1:3]
  # colnames(hoi)=c('Contaminants','Steroids','Bile Acids or Lipids','Weight')#,'Gender') ##
  colnames(hoi)=c('Contaminants','Steroids','Bile Acids or Lipids')
  #,'Gender') ##https://stats.stackexchange.com/questions/282155/causal-mediation-analysis-negative-indirect-and-total-effect-positive-direct# https://www.researchgate.net/post/How_can_I_interpret_a_negative_indirect_effect_for_significant_mediation# https://stackoverflow.com/questions/31518150/gsub-in-r-is-not-replacing-dot replacing dot
  
  hoi[,'Steroids' ][hoi[,'Steroids' ]=='17aOH.P4']='17a-OHP4'
  hoi[,'Steroids' ][hoi[,'Steroids' ]=='17aOH-P4']='17a-OHP4'
  hoi[,'Steroids' ]  <- gsub("\\.", "-",  hoi[,'Steroids' ] ) #:)
  hoi[,'Steroids' ][ hoi[,'Steroids' ]=='T-Epi-T']='T/Epi-T'
  # df2 <- hoi %>%make_long('Contaminants','Steroids','Bile Acids or Lipids','Weight') #see the sankey test file 17.10.24 for this...
  
  df2 <- hoi %>%make_long('Contaminants','Steroids','Bile Acids or Lipids') 
  
  #In case you need to print to computer: meda='Sankey plot of ', pdf(paste(meda,name,sick,Group,".pdf"), width = 20, height = 20,  pointsize = 18);
  # print(ggplot(df2, aes(x = x,  next_x = next_x, node = node,  next_node = next_node,fill = factor(node),label = node)) +geom_sankey(flow.alpha = 0.5, node.color = 1) +        geom_sankey_label(size = 5.5, color = 1, fill = "white") +scale_fill_viridis_d() + theme_sankey(base_size = 30) + theme(legend.position = "none")+
  #theme(axis.title.x = element_blank()));dev.off()
  
windowsFonts(A = windowsFont("Calibri (Body)")) 

p=ggplot(df2, aes(x = x,  next_x = next_x, node = node,  next_node = next_node,fill = factor(node),label = node)) +
          geom_sankey(flow.alpha = 1, node.color = 1) +
          # geom_sankey_label(size = 4.0, color = 1, fill = "white") + #
          geom_sankey_label(size = 8.0, color = 1, fill = "white")+
          scale_fill_viridis_d(option = "H", alpha = 0.75) +
          # scale_fill_viridis_c(option = "turbo")+
          # theme_sankey(base_size = 23) + #
          theme_sankey(base_size = 28) + theme(legend.position = "none") +
          # scale_fill_grey(start = 0.5, end = 0.5)+
          theme(axis.text.x = element_text(hjust = 0.5, vjust=7,colour = 'black') )+ #https://stackoverflow.com/questions/38862303/customize-ggplot2-axis-labels-with-different-colors
          theme(axis.title.x = element_blank())


# Nicer colors than grey:
# p=ggplot(df2, aes(x = x,  next_x = next_x, node = node,  next_node = next_node,fill = factor(node),label = node)) +geom_sankey(flow.alpha = 0.5, node.color = 1) + geom_sankey_label(size = 3.5, color = 1, fill = "white") +
# scale_fill_viridis_d() + theme_sankey(base_size = 16) + theme(legend.position = "none")+theme(axis.title.x = element_blank());

library(ragg)
# Oh! https://www.tidyverse.org/blog/2020/08/taking-control-of-plot-scaling/
# https://r4ds.had.co.nz/graphics-for-communication.html#figure-sizing

path="C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis/" #oh, classical: https://forum.posit.co/t/r-markdown-html-document-doesnt-show-image/41629/2
pngfile <- fs::path(path,paste0(Group,'e',d,".png"))#fs::path(knitr::fig_path(),  "theming2.png")
# agg_png(pngfile, width = 30, height = 40, units = "cm", res = 300,scaling = 2) #
agg_png(pngfile, width = 60, height = 80, units = "cm", res = 600,scaling = 2)
plot(p)
invisible(dev.off())
knitr::include_graphics(pngfile)
eoh=paste0(Group,'e',d,".png"); daiR::image_to_pdf(eoh, pdf_name=paste0(eoh,'.pdf'))
my_image <- image_read(eoh);my_svg <- image_convert(my_image, format="svg"); image_write(my_svg, paste(eoh,".svg"))



}

library(readxl)
# setwd("C:/Users/patati/Desktop/Turku/R/tests6/tests_basic/") #check this if needed...
all_all=read_xlsx(path = "C:/Users/patati/Desktop/Turku/R/tests6/tests_basic/100basic All tikka3624 .xlsx") #total Male tikka76524 hyp4b_oki.xlsx") #
# all_all=read_xlsx(path = "C:/Users/patati/Desktop/Turku/R/hypo_basic/100 hypo_b_no_not sick All tikka221024 .xlsx") # #_2 :)
all_all=as.data.frame(all_all); all_all=all_all[!is.na(all_all$ACME),]; all_all=na.omit(all_all); all_all1=all_all
all_Female=read_xlsx(path = "C:/Users/patati/Desktop/Turku/R/tests6/tests_basic/100basic Female tikka3624 .xlsx") #total Male tikka76524 hyp4b_oki.xlsx") #
all_Female=as.data.frame(all_Female); all_Female=all_Female[!is.na(all_Female$ACME),]; all_Female=na.omit(all_Female);
all_Male=read_xlsx(path = "C:/Users/patati/Desktop/Turku/R/tests6/tests_basic/100basic Male tikka3624 .xlsx") #total Male tikka76524 hyp4b_oki.xlsx") #
all_Male=as.data.frame(all_Male); all_Male=all_Male[!is.na(all_Male$ACME),]; all_Male=na.omit(all_Male);

# setwd("C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis/")
sick='all samples';d='t'
lkm=30;Group='All'; name='just alala';date=paste0(date,'_allds')#dim(all_all)[1];
alma=reduced2(all_all1,Group,name,lkm);
alma = na.omit(alma)
plottings_sf(alma,date,sick,Group,d)

date=paste0(date,'_femalads');name='just alala';Group='female';
almaf=reduced2(all_Female,Group,name,lkm); #all_Female
almaf = na.omit(almaf)
plottings_sf(almaf,date,sick,Group,d)

date=paste0(date,'_malads');name='just alaal';Group='male';
almam=reduced2(all_Male,Group,name,lkm);
almam = na.omit(almam) #https://www.tutorialspoint.com/how-to-remove-rows-from-data-frame-in-r-that-contains-nan
plottings_sf(almam,date,sick,Group,d)


#Alternative way to plot sankeys with the ACMEs:
#Create data which can be used for Sankey
set.seed(111);theme_set(theme_light())

#Trying to imitate the approach below:
u3=all_all1; d='t'
c1=c()
ACMEMedian=c();ACMEpval=c();ACMEVar=c()
ADEMedian=c();ADEpval=c();ADEVar=c()
c1= u3 #[u3[,'ADE'] < ADEMedian  & DV<ADEVar,] #& u3[,'z0.p']<ADEpval
# ACMEMedian=0#median(c1[,'ACME'][c1[,'ACME']>0])
c1=c1[rev(order(c1[,'ACME'])),];  
c1=c1[((c1[,'ACME']-c1[,'ADE']) > 0), ] #c1[,'ACME']>ACMEMedian & 
c1=c1[c1[,'d0.p']<0.05, ] # c1=tryCatch({c1[1:lkm,]}, error = function(msg){return(c1)})
uh7ma = na.omit(c1)
rt2=uh7ma #[,1:17]# rtot=rtot[,1:17]# rtot=data.frame(rtot) # name=paste(simss,'basic hypothesis',take)# https://stat.ethz.ch/R-manual/R-devel/library/stats/html/p.adjust.html# https://www.middleprofessor.com/files/applied-biostatistics_bookdown/_book/adding-covariates-to-a-linear-model# https://github.com/MarioniLab/miloR# https://www.nature.com/articles/s41467-023-40458-9/figures/4
name=paste('Contaminants_Steroids_BAs_or_Lipids_sims',date) # rtot=rtot_2000_mrct # rtot=uh5

hoi=c(); 
if (d=='t') {hoi=scan(text=rt2[,1] , what=" ")} else {hoi=scan(text=rownames(rt2) , what=" ")} #rownames(rt2)# names(rt2[,1]) rownames(rt2)
# hoi=scan(text=rownames(rt2) , what=" ")#rownames(rt2)# names(rt2[,1]) rownames(rt2)
hoi=as.data.frame(matrix(hoi, ncol = 3,  byrow = TRUE), stringsAsFactors = FALSE) #Check this number (ncol) 3/4
hoi=cbind(hoi[,1:3],rt2[,2])
colnames(hoi)=c('Contaminants','Steroids','Bile Acids or Lipids','Weight')#,'Gender') ##https://stats.stackexchange.com/questions/282155/causal-mediation-analysis-negative-indirect-and-total-effect-positive-direct# https://www.researchgate.net/post/How_can_I_interpret_a_negative_indirect_effect_for_significant_mediation# https://stackoverflow.com/questions/31518150/gsub-in-r-is-not-replacing-dot replacing dot

hoi[,'Steroids' ][hoi[,'Steroids' ]=='17aOH.P4']='17a-OHP4'
hoi[,'Steroids' ][hoi[,'Steroids' ]=='17aOH-P4']='17a-OHP4'
hoi[,'Steroids' ]  <- gsub("\\.", "-",  hoi[,'Steroids' ] ) #:)
hoi[,'Steroids' ][ hoi[,'Steroids' ]=='T-Epi-T']='T/Epi-T'
  
df22 <-
  hoi |>
  subset(Weight > -1.05) |>
  pivot_stages_longer(c("Contaminants", "Steroids", "Bile Acids or Lipids"), "Weight", "Bile Acids or Lipids")
pos <- position_sankey(v_space = "auto", order = "ascending", align = "justify")
p <-
  ggplot(data    = df22,mapping = aes(x = stage, y = Weight, group = node,
                  edge_id = edge_id, connector = connector))+ theme_sankey(base_size = 28) + #theme(legend.position = "none") +
  # scale_fill_grey(start = 0.5, end = 0.5)+
  theme(axis.text.x = element_text(hjust = 0.5, vjust=7,colour = 'black') )+ #https://stackoverflow.com/questions/38862303/customize-ggplot2-axis-labels-with-different-colors
  theme(axis.title.x = element_blank())


pos <- position_sankey(v_space = "auto", order = "descending")
p + geom_sankeyedge(aes(fill = Weight), position = pos) +
  geom_sankeynode(position = pos,fill = "#dfe0e6") +
  geom_text(aes(label = node), stat = "sankeynode", position = pos, cex = 2)+scale_fill_viridis_c(option = "turbo")

# In case you need something else from somewhere else...
# setwd("C:/Users/patati/Desktop/Turku/R/hypo4/HOMAIR/perus_ok")
# all_all=read_xlsx(path = "100 hypo4_yes_sick All tikka76524 hyp4a_oki.xlsx") #total Male tikka76524 hyp4b_oki.xlsx") #
# all_all=as.data.frame(all_all); all_all=all_all[!is.na(all_all$ACME),]; all_all=na.omit(all_all); all_all1=all_all
# all_Female=read_xlsx(path = "100 hypo4_yes_sick Female tikka76524 hyp4a_oki.xlsx") #total Male tikka76524 hyp4b_oki.xlsx") #
# all_Female=as.data.frame(all_Female); all_Female=all_Female[!is.na(all_Female$ACME),]; all_Female=na.omit(all_Female);
# all_Male=read_xlsx(path = "100 hypo4_yes_sick Male tikka76524 hyp4a_oki.xlsx") #total Male tikka76524 hyp4b_oki.xlsx") #
# all_Male=as.data.frame(all_Male); all_Male=all_Male[!is.na(all_Male$ACME),]; all_Male=na.omit(all_Male);

# path="C:/Users/patati/Desktop/Turku/R/basic_cova/All"
# # setwd("C:/Users/patati/Desktop/Turku/R/basic_cova/All")
# files=list.files(path=path, pattern=".RData", all.files=TRUE, full.names=TRUE) #https://www.geeksforgeeks.org/read-all-files-in-directory-using-r/
# list_of_files <- list() 
# for (i in files) {list_of_files[[i]] <- get(load(paste0("", i)))}  #add files to list position
# names(list_of_files) <- str_remove(list.files(path=path, pattern=".RData", all.files=TRUE, full.names=FALSE),'.RData')  #https://stringr.tidyverse.org/reference/str_remove.html
# 
# # names(list_of_files)[1:3]
# setwd("C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis/")
# all_all1=list_of_files[[1]] #all_all=as.data.frame(all_all); all_all=all_all[!is.na(all_all$ACME),]; all_all=na.omit(all_all); all_all1=all_all
# all_Female=list_of_files[[2]] #all_Female=as.data.frame(all_Female); all_Female=all_Female[!is.na(all_Female$ACME),]; all_Female=na.omit(all_Female);
# all_Male=list_of_files[[3]] #all_Male=as.data.frame(all_Male); all_Male=all_Male[!is.na(all_Male$ACME),]; all_Male=na.omit(all_Male);
# 
# d='nt';lkm=30;Group='All'; name='just all'
# alma=reduced2(all_all1,Group,name,lkm);alma = na.omit(alma)
# plottings_sf(alma,date,sick,Group,d)
# 
# name='just all';Group='female';
# almaf=reduced2(all_Female,Group,name,lkm); almaf = na.omit(almaf)
# plottings_sf(almaf,date,sick,Group,d)
# 
# name='just all';Group='male';
# almam=reduced2(all_Male,Group,name,lkm); almam = na.omit(almam) #https://www.tutorialspoint.com/how-to-remove-rows-from-data-frame-in-r-that-contains-nan
# plottings_sf(almam,date,sick,Group,d)

#Ei tääkään ihan nyt mee copilotilla paremmaks.

Other Handy Functions

# Demographics:
# This is for nonscaled (or not-logged or the like 'raw') data only!

# Demographics Function
Demographics <- function(tv_all, Clini, Group) {
  # Determine the condition based on the group
  cond <- switch(Group,
                 'female' = tv_all[,'SEX.1F.2M'] == min(tv_all[,'SEX.1F.2M']),
                 'male' = tv_all[,'SEX.1F.2M'] == max(tv_all[,'SEX.1F.2M']),
                 'All' = rep(TRUE, nrow(tv_all)))
  
  # Filter the data based on the condition
  tv_red <- tv_all[cond, ]
  Clini2 <- Clini[rownames(tv_red), ]
  
  # Calculate demographics
  N <- nrow(tv_red)
  AGE <- median(tv_red[,'AGE'])
  AGEq1 <- quantile(tv_red[,'AGE'], 0.25)
  AGEq3 <- quantile(tv_red[,'AGE'], 0.75)
  BMI <- median(tv_red[,'BMI'])
  BMIq1 <- quantile(tv_red[,'BMI'], 0.25)
  BMIq3 <- quantile(tv_red[,'BMI'], 0.75)
  PFAS <- median(tv_red[,'PFAS'])
  PFASq1 <- quantile(tv_red[,'PFAS'], 0.25)
  PFASq3 <- quantile(tv_red[,'PFAS'], 0.75)
  HDL <- median(Clini2[,'HDL'])
  HDLq1 <- quantile(Clini2[,'HDL'], 0.25)
  HDLq3 <- quantile(Clini2[,'HDL'], 0.75)
  LDL <- median(as.numeric(Clini2[,'LDL']))
  LDLq1 <- quantile(as.numeric(Clini2[,'LDL']), 0.25)
  LDLq3 <- quantile(as.numeric(Clini2[,'LDL']), 0.75)
  SGmin <- min(tv_red[,'Steatosis.Grade.0.To.3'])
  SGmax <- max(tv_red[,'Steatosis.Grade.0.To.3'])
  FSmin <- min(tv_red[,'Fibrosis.Stage.0.to.4'])
  FSmax <- max(tv_red[,'Fibrosis.Stage.0.to.4'])
  NFmin <- min(tv_red[,'Necroinflammation'])
  NFmax <- max(tv_red[,'Necroinflammation'])
  HImin <- min(tv_red[,'HOMA-IR'])
  HImax <- max(tv_red[,'HOMA-IR'])
  HI <- median(tv_red[,'HOMA-IR'])
  HIq1 <- quantile(tv_red[,'HOMA-IR'], 0.25)
  HIq3 <- quantile(tv_red[,'HOMA-IR'], 0.75)
  
  return(c(N, AGE, AGEq1, AGEq3, BMI, BMIq1, BMIq3, PFAS, PFASq1, PFASq3, HDL, HDLq1, HDLq3, LDL, LDLq1, LDLq3,
           SGmin, SGmax, FSmin, FSmax, NFmin, NFmax, HImin, HImax, HI, HIq1, HIq3))
}

# Calculate demographics for each group
Group <- 'All'; d_all <- Demographics(tv, Clini, Group)
Group <- 'female'; d_female <- Demographics(tv, Clini, Group)
Group <- 'male'; d_male <- Demographics(tv, Clini, Group)

# Combine results into a matrix
d_totaali <- t(rbind(d_all, d_female, d_male))
rownames(d_totaali) <- c('N', 'AGE', 'AGEq1', 'AGEq3', 'BMI', 'BMIq1', 'BMIq3', 'PFAS', 'PFASq1', 'PFASq3', 'HDL', 'HDLq1', 'HDLq3', 'LDL', 'LDLq1', 'LDLq3',
                         'SGmin', 'SGmax', 'FSmin', 'FSmax', 'NFmin', 'NFmax', 'HImin', 'HImax', 'HI', 'HIq1', 'HIq3')
d_totaali
##              d_all   d_female     d_male
## N      104.0000000 69.0000000 35.0000000
## AGE     49.0000000 46.0000000 52.0000000
## AGEq1   43.0000000 43.0000000 44.5000000
## AGEq3   54.2500000 53.0000000 58.0000000
## BMI     45.2960308 45.1082579 46.2603878
## BMIq1   41.0973732 41.1066653 41.3139309
## BMIq3   49.0881549 48.7197232 49.6020327
## PFAS     0.4114301  0.3542222  0.4797220
## PFASq1   0.2846164  0.2517962  0.4114301
## PFASq3   0.5679388  0.5326427  0.7002281
## HDL      1.1000000  1.1300000  1.0100000
## HDLq1    0.9575000  0.9700000  0.8700000
## HDLq3    1.2825000  1.3600000  1.1650000
## LDL      2.3000000  2.5000000  1.9000000
## LDLq1    1.6000000  1.9000000  1.3500000
## LDLq3    2.9000000  3.0000000  2.4000000
## SGmin    0.0000000  0.0000000  0.0000000
## SGmax    3.0000000  3.0000000  1.0000000
## FSmin    0.0000000  0.0000000  0.0000000
## FSmax    4.0000000  3.0000000  4.0000000
## NFmin    0.0000000  0.0000000  0.0000000
## NFmax    2.0000000  1.0000000  2.0000000
## HImin    0.2426667  0.7822222  0.2426667
## HImax   14.5244444 14.5244444 10.7555556
## HI       3.1257778  3.0417778  3.9200000
## HIq1     1.7556667  1.6333333  2.2644444
## HIq3     4.5747778  3.8666667  5.4240000
# Sample Size Functions
# Function to determine sample sizes for binary outcomes
sample_size <- function(NAFLD, Outcome, Group) { 
  NAFLDo <- switch(Group,
                   'Male' = NAFLD[NAFLD[,'SEX.1F.2M'] == 2, ],
                   'Female' = NAFLD[NAFLD[,'SEX.1F.2M'] == 1, ],
                   'All' = NAFLD)
  
  n0 <- nrow(NAFLDo[NAFLDo[, Outcome] == 0, ])
  n1 <- nrow(NAFLDo[NAFLDo[, Outcome] > 0, ])
  
  return(c(n0, n1))
}

# Function to determine sample sizes for continuous outcomes
sample_size2 <- function(NAFLD, Outcome, Group, sick_groupe) { 
  sample_data <- c()
  for (i in 1:2) {
    NAFLDo <- switch(Group,
                     'Male' = NAFLD[NAFLD[,'SEX.1F.2M'] == 2 & (i == 1 & !sick_groupe | i == 2 & sick_groupe), ],
                     'Female' = NAFLD[NAFLD[,'SEX.1F.2M'] == 1 & (i == 1 & !sick_groupe | i == 2 & sick_groupe), ],
                     'All' = NAFLD[(i == 1 & !sick_groupe | i == 2 & sick_groupe), ])
    sample_data <- c(sample_data, nrow(NAFLDo))
  }
  return(sample_data)
}

# Prepare NAFLD data
NAFLD <- tv[, 1:28]
NAFLD[NAFLD[, 5] > 0, 5] <- 1
NAFLD[NAFLD[, 6] > 0, 6] <- 1
NAFLD[NAFLD[, 7] > 0, 7] <- 1
NAFLD[NAFLD[, 8] <= 1.5, 8] <- 0
NAFLD[NAFLD[, 8] > 1.5, 8] <- 1
colnames(NAFLD) <- gsub("-", ".", colnames(NAFLD))
colnames(NAFLD) <- gsub("/", ".", colnames(NAFLD))
colnames(NAFLD) <- gsub("11", "X11", colnames(NAFLD))
colnames(NAFLD) <- gsub("17", "X17", colnames(NAFLD))
colnames(NAFLD) <- gsub("#", ".", colnames(NAFLD))
colnames(NAFLD)[colnames(NAFLD) == 'X17aOH.P4'] <- 'X17.aOHP4'

# Calculate sample sizes for each outcome and group
jappend <- c()
outcomes <- c('Steatosis.Grade.0.To.3', 'Fibrosis.Stage.0.to.4', 'Necroinflammation', 'HOMA.IR')
groups <- c('All', 'Female', 'Male')

for (Outcome in outcomes) {
  for (Group in groups) {
    jappend <- c(jappend, sample_size(NAFLD, Outcome, Group))
  }
}

matrix(unlist(jappend), ncol = 8)
##      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## [1,]   83   11   62   26   88    9   18   56
## [2,]   21   25   42   19   16   28   86    5
## [3,]   58   10   43   16   60    7   13   30
# Mean and Quantile Calculation Function
mean_q1q3 <- function(Group) {
  cond <- switch(Group,
                 'Female' = tv_all[,'Gender'] == min(tv_all[,'Gender']),
                 'Male' = tv_all[,'Gender'] == max(tv_all[,'Gender']),
                 'All' = rep(TRUE, nrow(tv_all)))
  
  tv_red <- tv[cond, ]
  M <- tv_red[, Mediator]
  
  cM <- round(apply(M, 2, median, na.rm = TRUE), 0)
  quants <- c(0.25, 0.75)
  csd <- round(apply(M, 2, quantile, probs = quants, na.rm = TRUE), 0)
  
  tot <- rbind(cM, csd)
  return(tot)
}

totQ <- mean_q1q3('All')
femQ <- mean_q1q3('Female')
menQ <- mean_q1q3('Male')
tot3 <- cbind(t(totQ), t(femQ), t(menQ))
print(tot3)
##             cM   25%   75%    cM   25%   75%    cM   25%   75%
## E        40685 33628 45755 40538 30857 45250 42424 37906 49608
## 11-KT      926   617  1161   925   606  1163   942   759  1087
## 17a-OHP5  1613   924  3290  1615   776  3293  1612  1190  3050
## S          843   560  1134   810   533  1050   892   576  1346
## B         7258  5046 12538  7545  5054 12526  6495  5085 12852
## 11b-OHA4  8870  6700 11868  9461  6379 11406  8663  7510 12528
## 11-KDHT    100   100   100   100   100   100   100   100   153
## 11-KA4    1262   845  1687  1262   838  1663  1318   890  1699
## DHEA      8294  5602 12330  9395  6072 13100  6683  5301  9524
## T/Epi-T   1225   838  8089   913   717  1216 10808  8204 13367
## 17a-OHP4  1121   631  1717   830   527  1510  1510  1076  1839
## AN         250   250   507   250   250   505   250   250   410
## DHT        276   132   461   203   109   311   524   381   632
## A4        2226  1548  3064  2366  1578  3276  1995  1539  2424
## DOC         62    47    90    60    48    96    67    44    87
## P5        2091  1288  4116  2375  1370  4172  1584  1120  2464
## P4         124    79   262   160    92   586    94    75   146
## A         1105   685  1827  1291   925  2004   722   443  1142
## E2         137    77   271   167    66   441   117   104   175
## E1         271   170   407   321   193   482   232   166   285
#Above goes, but not below...

# I wanted to check the steroids with highest ACMEs between healthy (all, all) and sick (all, all).
# I needed to have the cutoffs and ways to see the overlaps. For that, I developed a function called 'the_combos'
#Some data is needed... :)
path="C:/Users/patati/Desktop/Turku/R/hypo_basic/Tiedostot/"; setwd(path)
files <- list.files(pattern="*.RData")
ldf <- lapply(files, load)
list_of_files <- list() #create empty list
# Loop through the files
for (i in files) {list_of_files[[i]] <- get(load(paste0("", i)))}  #add files to list position
# https://www.reddit.com/r/Rlanguage/comments/nq773b/reading_multiple_rdata_files_into_a_list/
names(list_of_files) <- files #https://stackoverflow.com/questions/38643000/naming-list-elements-in-r
library(stringr)

the_combos=function(list_of_files,Group,cond) { #cond='' vastaa kaikkia
#General categories of females or males (without 'All, 'MASLD, and 'Menopause')
u <- names(list_of_files)
a <- Group #note the writing, yes with " " in between. " female"  or " male" or " all"
ie=grep(a,u,fixed=TRUE); u2=u[ie]
del=c(grep("All",u2,fixed=TRUE),grep("MASLD",u2,fixed=TRUE),grep("Menopause",u2,fixed=TRUE))
yl=1:length(u2); lop=yl[!yl %in% del]
u=u2[lop] #general male
ie=grep(cond,u,fixed=TRUE); u=u[ie]
a <- "sick"; ie=grep(a,u,fixed=TRUE); u3=u[ie]# sick ones, male or females
u_sick=list_of_files[u3]#https://www.tutorialspoint.com/how-to-extract-strings-that-contains-a-particular-substring-in-an-r-vector
a='healthy';ie=grep(a,u,fixed=TRUE);u4=u[ie]
u_healthy=list_of_files[u4] # u_healthy=list_of_files[grep(a,u,fixed=TRUE)] 
tcross=function(u_sick) { 
  all_names = c(); i=1
  for (i in (1:length(u_sick))) { #length(u_sick) is 18
    us=u_sick[[i]]
    aux=c();
    if (dim(us)[1]>200) {aux=200} else {aux=dim(us)[1]}
    # plot(1:aux,as.numeric(us[1:aux,1]))
    values=(1:(length(as.numeric(us[,1]))-1))
    coo=c(); z=0
    for (z in values) {coo=append(coo,abs(us[z,1]-us[(z+1),1]))}  
    pss=which(coo>quantile(coo,0.95));ro=round(length(coo)/3)# pss[pss<ro]#round(length(which(coo>quantile(coo,0.95)))/2)]
    dpp=diff(pss[pss<ro]) #https://stackoverflow.com/questions/13602170/how-do-i-find-the-difference-between-two-values-without-knowing-which-is-larger
    dpp_sort=sort(dpp,decreasing = TRUE)
    if (length(dpp_sort)<5) { for_comp=length(dpp)+1} else {
      if (sum(dpp_sort[1:5]>5)==5) {cf=dpp_sort[5];cff=which(dpp>cf)[1]} else {cf=dpp_sort[2];cff=which(dpp>cf)[1]}
      cff=which(dpp>(cf-1))[1]
      for_comp=pss[cff]; }
    
    if (aux<30) {for_comp  =max(which(as.vector(us[,1]>0)))}
    if (aux>30) {if (max(pss[pss<ro])<30) (for_comp=30)} #due the small amount of good ones that can be like 4...
    
    rt2=us[1:for_comp,]; j=0
    hoi=c();for (j in 1:dim(rt2)[1]) {hoi=append(hoi,scan(text=rownames(rt2)[j], what=""))}
    hoi=as.data.frame(matrix(hoi, ncol = 4,  byrow = TRUE), stringsAsFactors = FALSE)
    hoi[,2] <- gsub("\\.", "-",  hoi[,2] ) #:)
    xz=round(quantile(table(hoi[,2]),0.25)); if (xz<2) {xz=0} else {xz=xz} #let's start with 25% and if not ok go to like 5%
    names=c();names=names(table(hoi[,2])[table(hoi[,2])>xz]) 
    # print(all_names)
    all_names=append(names,all_names)
  }
  return(sort(table(all_names),decreasing = TRUE))  
}
tc_sick=tcross(u_sick);tc_healthy=tcross(u_healthy) # table(the_cross)# hist(table(the_cross),breaks=10)
cae1=as.numeric(names(sort(table(tc_sick),decreasing = TRUE)))[1];cae2=as.numeric(names(sort(table(tc_sick),decreasing = TRUE)))[2]
if (max(tc_sick)==cae1 | max(tc_sick)==cae2)
cfn=cae2-1 #sometimes as with females no differences, i.. tc_sick;rev(as.numeric(names(table(tc_sick))))[2]-1#
if (is.na(cfn)) {steroid_sick=names(tc_sick)} else {steroid_sick=names(tc_sick[tc_sick>(cfn)])}
cae1=as.numeric(names(sort(table(tc_healthy),decreasing = TRUE)))[1];cae2=as.numeric(names(sort(table(tc_healthy),decreasing = TRUE)))[2]
if (max(tc_healthy)==cae1 | max(tc_healthy)==cae2)
cfn=cae2-1 
if (is.na(cfn)) {steroid_healthy=names(tc_healthy)} else {steroid_healthy=names(tc_healthy[tc_healthy>(cfn)])}
# https://stackoverflow.com/questions/45271448/r-finding-intersection-between-two-vectors
tbe=intersect(steroid_healthy, steroid_sick)
totaali_sh_all=steroid_sick[!steroid_sick %in% tbe] #https://www.geeksforgeeks.org/difference-between-two-vectors-in-r/ 
return(list(steroid_sick,tbe,totaali_sh_all)) } #"17a-OHP4" "DHT"      "DOC"      'P4'

Group = ' all'; cond='';all_all=the_combos(list_of_files,Group,cond) #ekat allit ('All...') oli deletoitu, yes, ja käytetty vain spesifisiä alleja...
Group = ' female'; cond='';female_all=the_combos(list_of_files,Group,cond) 
Group = ' male'; cond='';male_all=the_combos(list_of_files,Group,cond) 
Group = ' all'; cond='Steatosis';all_steatosis=the_combos(list_of_files,Group,cond) 
Group = ' female'; cond='Steatosis';female_steatosis=the_combos(list_of_files,Group,cond) 
Group = ' male'; cond='Steatosis';male_steatosis=the_combos(list_of_files,Group,cond) 
Group = ' all'; cond='Fibrosis';all_Fibrosis=the_combos(list_of_files,Group,cond) 
Group = ' female'; cond='Fibrosis';female_Fibrosis=the_combos(list_of_files,Group,cond) 
Group = ' male'; cond='Fibrosis';male_Fibrosis=the_combos(list_of_files,Group,cond) 
Group = ' all'; cond='Necroinflammation';all_Necroinflammation=the_combos(list_of_files,Group,cond) 
Group = ' female'; cond='Necroinflammation';female_Necroinflammation=the_combos(list_of_files,Group,cond) 
Group = ' male'; cond='Necroinflammation'; male_Necroinflammation=the_combos(list_of_files,Group,cond) 
Group = ' all'; cond='HOMAIR'; all_HOMAIR=the_combos(list_of_files,Group,cond) 
Group = ' female'; cond='HOMAIR'; female_HOMAIR=the_combos(list_of_files,Group,cond) 
Group = ' male'; cond='HOMAIR'; male_HOMAIR=the_combos(list_of_files,Group,cond) 

pottees=c(all_all[3],female_all[3],male_all[3],
            all_steatosis[3],female_steatosis[3],male_steatosis[3],
            all_Fibrosis[3],female_Fibrosis[3],male_Fibrosis[3],
            all_Necroinflammation[3],female_Necroinflammation[3],male_Necroinflammation[3],
            all_HOMAIR[3],female_HOMAIR[3],male_HOMAIR[3])

# In case you need to print the above list. This is good for printing list of list in anyways:
# mylist <- pottees; file <- paste0("myfile_ok",date,".txt"); conn <- file(description=file, open="w")
# newlist <- lapply(seq_len(length(mylist)), function(i){
#   lapply(seq_len(length(mylist[[i]])), function(j) {
#     temp <- c(i, j, mylist[[i]][[j]])
#     writeLines(text=paste(temp, collapse=","), con=conn, sep="\r\n")  
#   }) }); close(conn)

# So this will give the most common steroids in all cases compared to all cases in all the subjects (all, female, male)
table(unlist(pottees))[rev(order(table(unlist(pottees))))]
## 
##      DOC       AN     MUFA       E1 17a-OHP4  11-KDHT   TG_SFA  TCDCA_L 
##       13       13       10        9        8        7        6        6 
##   GHCA_L      Cer       A4   TLCA_L       PI   Hexcer  GCDCA_L       DG 
##        6        6        6        5        5        5        5        5 
## 11b-OHA4       PE   THCA_L      LPC  TDHCA_L   TDCA_L  TbMCA_L       PC 
##        5        4        3        3        2        2        2        2 
##    GCA_L    DCA_L   CDCA_L    11-KT   UDCA_L  GUDCA_L   GLCA_L  GHDGA_L 
##        2        2        2        2        1        1        1        1 
##       CE     C4_L 
##        1        1
# For comparing the PFAS/steroid/BA (or lipid) in healthy and sick mediation:
# Load all the variables in the folder:
setwd("C:/Users/patati/Desktop/Turku/R/hypo4/Tiedostot/") #check this if needed...
files <- list.files(pattern="*.RData")
ldf <- lapply(files, load)
list_of_files <- list() #create empty list
# Loop through the files:
for (i in files) {list_of_files[[i]] <- get(load(paste0("", i)))}  #add files to list position
names(list_of_files) <- files #ht
# https://www.reddit.com/r/Rlanguage/comments/nq773b/reading_multiple_rdata_files_into_a_list/
# https://stackoverflow.com/questions/38643000/naming-list-elements-in-r

comp_med_two=function(list_of_files) { #I made this a function, since this kind of cross comparison could be handy also in other contexts
health=c();sickness=c()
for (i in 1:length(names(list_of_files))) if (str_detect(names(list_of_files)[i],'healthy')) {health=append(health,list_of_files[i])} else if (str_detect(names(list_of_files)[i],'sick')) {sickness=append(sickness,list_of_files[i])} 
health2=c();sickness2=c()
i=0;for (i in 1:length(c(health))){health[[i]][rev(order(health[[i]][,1])),];health2=rbind(health2,health[[i]][1:10,])}
i=0;for (i in 1:length(c(sickness))){sickness[[i]][rev(order(sickness[[i]][,1])),];sickness2=rbind(sickness2,sickness[[i]][1:10,])}
health2=cbind(rownames(health2),health2);sickness2=cbind(rownames(sickness2),sickness2)
colnames(health2)=c('Mediation','ACME', 'd0.p', 'd0.ci_l','d0.ci_u','ADE', 'z0.p', 'z0.ci_l','z0.ci_u','Proportion Mediated', 'n1.p','n.ci_l','n1.ci_u','Total Effect','tau.p','tau.ci_l','tau.ci_u')
colnames(sickness2)=c('Mediation','ACME', 'd0.p', 'd0.ci_l','d0.ci_u','ADE', 'z0.p', 'z0.ci_l','z0.ci_u','Proportion Mediated', 'n1.p','n.ci_l','n1.ci_u','Total Effect','tau.p','tau.ci_l','tau.ci_u')
rownames(health2)=str_replace_all(rep( names(health),each = 10), ".RData", "")
rownames(sickness2)=str_replace_all(rep( names(sickness),each = 10), ".RData", "")
# https://www.rdocumentation.org/packages/base/versions/3.6.2/topics/rep
# https://stackoverflow.com/questions/10294284/remove-all-special-characters-from-a-string-in-r
# https://stackoverflow.com/questions/38643000/naming-list-elements-in-r
hoi=c(); hoi=scan(text=health2[,1], what=""); hoi=as.data.frame(matrix(hoi, ncol = 4,  byrow = TRUE), stringsAsFactors = FALSE)
colnames(hoi)=c('Contaminants','Steroids','Bile Acids or Lipids','Desig.'); hoi_healthy=hoi[,c('Contaminants','Steroids','Bile Acids or Lipids')]
hoi=c(); hoi=scan(text=sickness2[,1], what=""); hoi=as.data.frame(matrix(hoi, ncol = 4,  byrow = TRUE), stringsAsFactors = FALSE)
colnames(hoi)=c('Contaminants','Steroids','Bile Acids or Lipids','Desig.');
hoi_sick=hoi[,c('Contaminants','Steroids','Bile Acids or Lipids')] ## https://stats.stackexchange.com/questions/282155/causal-mediation-analysis-negative-indirect-and-total-effect-positive-direct
return(list(hoi_sick,hoi_healthy))}

cmt=comp_med_two(list_of_files);
hoi_sick=cmt[[1]];hoi_healthy=cmt[[2]]

# So the differences between contaminants (in the sick vs. healthy 'mediation') are:
table(hoi_sick[,1])[rev(order(table(hoi_sick[,1])))];table(hoi_healthy[,1])[rev(order(table(hoi_healthy[,1])))]
## 
##           PFOA          PFHpA          PFHxA          PFHxS PFHxA_Branched 
##             37             35             33             29             20 
##           PFNA           PFOS 
##             16             10
## 
##          PFHpA           PFOS          PFHxA           PFNA PFHxA_Branched 
##             81             37             27             15             12 
##          PFHxS 
##              8
# Differences Between Steroids...
# table(hoi_sick[,2])[rev(order(table(hoi_sick[,2])))];table(hoi_healthy[,2])[rev(order(table(hoi_healthy[,2])))]
# Differences Between BAs/Lipids...
# table(hoi_sick[,3])[rev(order(table(hoi_sick[,3])))];table(hoi_healthy[,3])[rev(order(table(hoi_healthy[,3])))]

Disclaimer

# This is a part of on-going research work that has not been published yet and I cannot take a responsibility if something above is not working/ok in your system/research/etc. in 100% accuracy. 
# Furthermore, I am inbound to update these sites relatively often so a grain of salt is needed when reading these... :)
# Please note also that most of the plotting functions have bee commented out so that their execution would not take time when compiling as well as due quality reasons: Rmarkdown print is not as good in the screen and in final (html) document as the similar with knitr's 'include_graphics' that shows the full scale image.
# In addition, no sensitive data has been shared and you see only 'examples' here. Moreover, I am not an expert on biology (or even at computing per se) and may not know your specific biochemical and/or mechanical questions that you maybe using this for. 
# Not to mention few, I just want to say that I am just doing the analysis for understanding the data partly on its own right with the context as mentioned above and partly as a starting 'side project'.
# Thanks and sorry for possible inconveniences in advance! :)

About R Setups

# As per this date, see above or below, 
thedate 
## [1] "230125"
# all the above codes work to produce results with the data files given.
# All the packages have been copied to local drive (12.9.24), i.e. the content of '.libPaths()'
# R is 'R version 4.3.1 (2023-06-16 ucrt)' (with 'version'/'getRversion()' command)
# And RStudio is '2024.4.0.735' (with 'RStudio.Version()' command)
# In addition, the following information regarding the setups is given:
sessionInfo()
## R version 4.4.1 (2024-06-14 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 10 x64 (build 19044)
## 
## Matrix products: default
## 
## 
## locale:
## [1] LC_COLLATE=Finnish_Finland.utf8  LC_CTYPE=Finnish_Finland.utf8   
## [3] LC_MONETARY=Finnish_Finland.utf8 LC_NUMERIC=C                    
## [5] LC_TIME=Finnish_Finland.utf8    
## 
## time zone: Europe/Helsinki
## tzcode source: internal
## 
## attached base packages:
## [1] stats4    grid      stats     graphics  grDevices datasets  utils    
## [8] methods   base     
## 
## other attached packages:
##   [1] xlsx_0.6.5                  viridis_0.6.5              
##   [3] viridisLite_0.4.2           tufte_0.13                 
##   [5] tint_0.1.4                  superb_0.95.19             
##   [7] sjPlot_2.8.17               scatterplot3d_0.3-44       
##   [9] scater_1.34.0               scuttle_1.16.0             
##  [11] SingleCellExperiment_1.28.1 SummarizedExperiment_1.36.0
##  [13] Biobase_2.66.0              GenomicRanges_1.58.0       
##  [15] GenomeInfoDb_1.42.1         IRanges_2.40.0             
##  [17] S4Vectors_0.44.0            BiocGenerics_0.52.0        
##  [19] MatrixGenerics_1.18.0       matrixStats_1.4.1          
##  [21] scales_1.3.0                rstatix_0.7.2              
##  [23] rmdformats_1.0.4            rmarkdown_2.29             
##  [25] rgl_1.3.14                  reshape2_1.4.4             
##  [27] remotes_2.5.0               readxl_1.4.3               
##  [29] rcompanion_2.4.36           RColorBrewer_1.1-3         
##  [31] ragg_1.3.3                  qpgraph_2.40.0             
##  [33] quantreg_5.99.1             SparseM_1.84-2             
##  [35] psych_2.4.6.26              prettydoc_0.4.1            
##  [37] ppcor_1.1                   plotrix_3.8-4              
##  [39] plyr_1.8.9                  pathviewr_1.1.7            
##  [41] PerformanceAnalytics_2.0.4  xts_0.14.1                 
##  [43] pheatmap_1.0.12             MOFA2_1.16.0               
##  [45] mlma_6.3-1                  gplots_3.2.0               
##  [47] coxme_2.2-22                bdsmatrix_1.3-7            
##  [49] survival_3.7-0              abind_1.4-8                
##  [51] mgcv_1.9-1                  nlme_3.1-166               
##  [53] meta_8.0-1                  metadat_1.2-0              
##  [55] mediation_4.5.0             sandwich_3.1-1             
##  [57] mvtnorm_1.3-2               MASS_7.3-61                
##  [59] mdatools_0.14.2             Maaslin2_1.16.0            
##  [61] magrittr_2.0.3              magick_2.8.5               
##  [63] lsr_0.5.2                   lme4_1.1-35.5              
##  [65] lmtest_0.9-40               zoo_1.8-12                 
##  [67] lavaan_0.6-19               insight_1.0.0              
##  [69] igraph_2.1.1                hrbrthemes_0.8.7           
##  [71] Hmisc_5.2-1                 glmnet_4.1-8               
##  [73] Matrix_1.7-0                ggtext_0.1.2               
##  [75] ggh4x_0.2.8                 ggsankeyfier_0.1.8         
##  [77] ggsankey_0.0.99999          ggpubr_0.6.0               
##  [79] ggplotify_0.1.2             ggforestplot_0.1.0         
##  [81] ggforce_0.4.2               ggcorrplot_0.1.4.1         
##  [83] FSA_0.9.5                   fs_1.6.5                   
##  [85] extrafont_0.19              effsize_0.8.1              
##  [87] dmetar_0.1.0                datarium_0.1.0             
##  [89] daiR_1.0.1                  corrplot_0.95              
##  [91] correlation_0.8.6           ComplexHeatmap_2.22.0      
##  [93] circlize_0.4.16             censReg_0.5-38             
##  [95] maxLik_1.5-2.1              miscTools_0.6-28           
##  [97] car_3.1-3                   carData_3.0-5              
##  [99] brickster_0.2.5             binilib_0.2.0              
## [101] lubridate_1.9.3             forcats_1.0.0              
## [103] stringr_1.5.1               dplyr_1.1.4                
## [105] purrr_1.0.2                 readr_2.1.5                
## [107] tidyr_1.3.1                 tibble_3.2.1               
## [109] ggplot2_3.5.1               tidyverse_2.0.0            
## [111] bigsnpr_1.12.18             bigstatsr_1.6.1            
## 
## loaded via a namespace (and not attached):
##   [1] graph_1.84.0              Formula_1.2-5            
##   [3] zlibbioc_1.52.0           fpc_2.2-13               
##   [5] tidyselect_1.2.1          bit_4.5.0.1              
##   [7] doParallel_1.0.17         clue_0.3-66              
##   [9] lattice_0.22-6            rjson_0.2.23             
##  [11] blob_1.2.4                rngtools_1.5.2           
##  [13] S4Arrays_1.6.0            parallel_4.4.1           
##  [15] png_0.1-8                 cli_3.6.3                
##  [17] bayestestR_0.15.0         sjstats_0.19.0           
##  [19] askpass_1.2.1             openssl_2.2.2            
##  [21] gargle_1.5.2              textshaping_0.4.0        
##  [23] BiocIO_1.16.0             kernlab_0.9-33           
##  [25] BiocNeighbors_2.0.1       basilisk.utils_1.18.0    
##  [27] uwot_0.2.2                curl_6.0.1               
##  [29] mime_0.12                 evaluate_1.0.1           
##  [31] coin_1.4-3                stringi_1.8.4            
##  [33] backports_1.5.0           Exact_3.3                
##  [35] XML_3.99-0.17             httpuv_1.6.15            
##  [37] flexmix_2.3-19            AnnotationDbi_1.68.0     
##  [39] rappdirs_0.3.3            splines_4.4.1            
##  [41] nortest_1.0-4             mclust_6.1.1             
##  [43] getopt_1.20.4             jpeg_0.1-10              
##  [45] doRNG_1.8.6               pcaPP_2.0-5              
##  [47] glmmML_1.1.7              ggbeeswarm_0.7.2         
##  [49] rootSolve_1.8.2.4         lpSolve_5.6.22           
##  [51] arrow_17.0.0.1            DBI_1.2.3                
##  [53] HDF5Array_1.34.0          jquerylib_0.1.4          
##  [55] bigparallelr_0.3.2        withr_3.0.2              
##  [57] class_7.3-22              systemfonts_1.1.0        
##  [59] vwline_0.2-4              rtracklayer_1.66.0       
##  [61] htmlwidgets_1.6.4         flock_0.7                
##  [63] ggrepel_0.9.6             labeling_0.4.3           
##  [65] SparseArray_1.6.0         cellranger_1.1.0         
##  [67] DESeq2_1.46.0             DEoptimR_1.1-3-1         
##  [69] diptest_0.77-1            lmom_3.2                 
##  [71] annotate_1.84.0           reticulate_1.40.0        
##  [73] XVector_0.46.0            knitr_1.49               
##  [75] UCSC.utils_1.2.0          timechange_0.3.0         
##  [77] pbivnorm_0.6.0            foreach_1.5.2            
##  [79] fansi_1.0.6               caTools_1.18.3           
##  [81] qtl_1.70                  data.table_1.16.2        
##  [83] biglm_0.9-3               rhdf5_2.50.0             
##  [85] irlba_2.3.5.1             gridBezier_1.1-1         
##  [87] extrafontdb_1.0           DescTools_0.99.58        
##  [89] gridGraphics_0.5-1        lazyeval_0.2.2           
##  [91] yaml_2.3.10               crayon_1.5.3             
##  [93] later_1.4.1               tweenr_2.0.3             
##  [95] bigassertr_0.1.6          Rgraphviz_2.50.0         
##  [97] codetools_0.2-20          base64enc_0.1-3          
##  [99] GlobalOptions_0.1.2       sjlabelled_1.2.0         
## [101] KEGGREST_1.46.0           Rtsne_0.17               
## [103] shape_1.4.6.1             shinyBS_0.61.1           
## [105] Rsamtools_2.22.0          gdtools_0.4.1            
## [107] filelock_1.0.3            foreign_0.8-87           
## [109] pkgconfig_2.0.3           xml2_1.3.6               
## [111] mathjaxr_1.6-0            GenomicAlignments_1.42.0 
## [113] performance_0.12.4        xtable_1.8-4             
## [115] httr_1.4.7                rbibutils_2.3            
## [117] tools_4.4.1               prabclus_2.3-4           
## [119] beeswarm_0.4.0            htmlTable_2.4.3          
## [121] broom_1.0.7               checkmate_2.3.2          
## [123] rrapply_1.2.7             googleAuthR_2.0.2        
## [125] ggeffects_2.0.0           MatrixModels_0.5-3       
## [127] assertthat_0.2.1          digest_0.6.37            
## [129] optparse_1.7.5            numDeriv_2016.8-1.1      
## [131] bookdown_0.41             dir.expiry_1.14.0        
## [133] farver_2.1.2              tzdb_0.4.0               
## [135] yulab.utils_0.1.8         rpart_4.1.23             
## [137] glue_1.8.0                cachem_1.1.0             
## [139] polyclip_1.10-7           generics_0.1.3           
## [141] Biostrings_2.74.0         CompQuadForm_1.4.3       
## [143] mnormt_2.1.1              ScaledMatrix_1.14.0      
## [145] fontBitstreamVera_0.1.1   minqa_1.2.8              
## [147] httr2_1.0.7               ggstance_0.3.7           
## [149] utf8_1.2.4                sjmisc_2.8.10            
## [151] basilisk_1.18.0           gtools_3.9.5             
## [153] datawizard_0.13.0         ggsignif_0.6.4           
## [155] shiny_1.9.1               gridExtra_2.3            
## [157] GenomeInfoDbData_1.2.13   rhdf5filters_1.18.0      
## [159] RCurl_1.98-1.16           memoise_2.0.1            
## [161] gld_2.6.6                 fontLiberation_0.1.0     
## [163] renv_1.0.11               rmio_0.4.0               
## [165] netmeta_2.9-0             rstudioapi_0.17.1        
## [167] cluster_2.1.6             magic_1.6-1              
## [169] hms_1.1.3                 munsell_0.5.1            
## [171] cowplot_1.1.3             colorspace_2.1-1         
## [173] poibin_1.6                rlang_1.1.4              
## [175] quadprog_1.5-8            collapse_2.0.18          
## [177] xfun_0.49                 multcompView_0.1-10      
## [179] TH.data_1.1-2             e1071_1.7-16             
## [181] metafor_4.6-0             iterators_1.0.14         
## [183] modeltools_0.2-23         libcoin_1.0-10           
## [185] rJava_1.0-11              Rhdf5lib_1.28.0          
## [187] bigsparser_0.7.3          promises_1.3.2           
## [189] bitops_1.0-9              Rdpack_2.6.2             
## [191] RSQLite_2.3.9             proxy_0.4-27             
## [193] fgsea_1.32.0              DelayedArray_0.32.0      
## [195] compiler_4.4.1            beachmat_2.22.0          
## [197] boot_1.3-31               Rcpp_1.0.13-1            
## [199] Rttf2pt1_1.3.12           BiocSingular_1.22.0      
## [201] fontquiver_0.2.1          googleCloudStorageR_0.7.0
## [203] BiocParallel_1.40.0       gridtext_0.1.5           
## [205] R6_2.5.1                  multcomp_1.4-26          
## [207] fastmap_1.2.0             fastmatch_1.1-4          
## [209] vipor_0.4.7               rsvd_1.0.5               
## [211] nnet_7.3-19               gtable_0.3.6             
## [213] MuMIn_1.48.4              KernSmooth_2.23-24       
## [215] htmltools_0.5.8.1         bit64_4.5.2              
## [217] lifecycle_1.0.4           zip_2.3.1                
## [219] nloptr_2.1.1              restfulr_0.0.15          
## [221] xlsxjars_0.6.1            sass_0.4.9               
## [223] vctrs_0.6.5               plm_2.6-4                
## [225] robustbase_0.99-4-1       haven_2.5.4              
## [227] bslib_0.8.0               pillar_1.9.0             
## [229] GenomicFeatures_1.58.0    locfit_1.5-9.10          
## [231] expm_1.0-0                jsonlite_1.8.9           
## [233] GetoptLong_1.0.5

References

# Here are the reference links unceremoniously in order of appearance:

# https://stackoverflow.com/questions/43527520/r-markdown-yaml-scanner-error-mapping-values
# https://appsilon.com/imputation-in-r 
# https://www.datasciencemadesimple.com/get-minimum-value-of-a-column-in-r-2/?expand_article=1
# https://readxl.tidyverse.org/articles/column-names.html
# https://www.analyticsvidhya.com/blog/2021/06/hypothesis-testing-parametric-and-non-parametric-
# https://stackoverflow.com/questions/54264980/r-how-to-set-row-names-attribute-as-numeric-from-character 
# https://stackoverflow.com/questions/13676878/fastest-way-to-get-min-from-every-column-in-a-matrix
# https://www.geeksforgeeks.org/performing-logarithmic-computations-in-r-programming-log-log10-log1p-and-log2-functions/ 
# https://stackoverflow.com/questions/50476717/i-want-to-align-match-two-
# https://mdatools.com/docs/preprocessing--autoscaling.html
# https://svkucheryavski.gitbooks.io/mdatools/content/preprocessing/text.html
# https://stackoverflow.com/questions/6984796/how-to-paste-a-string-on-each-element-of-a-vector-of-strings-using-apply-in-r
# https://stackoverflow.com/questions/18997297/remove-ending-of-string-with-gsub
# https://rdrr.io/bioc/qpgraph/man/qpNrr.html
# https://sparkbyexamples.com/r-programming/r-remove-from-vector-with-examples/
# https://r-graph-gallery.com/265-grouped-boxplot-with-ggplot2.html
# https://stackoverflow.com/questions/53724834/why-does-the-plot-size-differ-between-docx-and-html-in-rmarkdownrender
# https://stackoverflow.com/questions/32539222/group-boxplot-data-while-keeping-their-individual-x-axis-labels-in-ggplot2-in-r
# https://stackoverflow.com/questions/34522732/changing-fonts-in-ggplot2
# https://stackoverflow.com/questions/37488075/align-axis-label-on-the-right-with-ggplot2
# http://www.sthda.com/english/wiki/ggplot2-rotate-a-graph-
# http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/76-add-p-values-and-significance-levels-to-ggplots/
# https://stackoverflow.com/questions/76758153/is-there-a-way-to-change-the-asterisks-to-match-custom-p-values 
# https://datavizpyr.com/horizontal-boxplots-with-ggplot2-in-r/
# https://stackoverflow.com/questions/72564551/a-custom-legend-unrelated-to-data-in-ggplot
# https://forum.posit.co/t/r-markdown-html-document-doesnt-show-image/41629/2
# https://stackoverflow.com/questions/66429500/how-to-make-raggagg-png-device-work-with-ggsave
# https://stats.stackexchange.com/questions/237256/is-the-shapiro-wilk-test-only-applicable-to-smaller-sample-sizes  
# https://statisticsbyjim.com/hypothesis-testing/nonparametric-parametric-tests/
# https://www.statology.org/mean-standard-deviation-grouped-data/  
# https://amsi.org.au/ESA_Senior_Years/SeniorTopic4/4h/4h_2content_11.html  
# https://www.themathdoctors.org/mean-and-standard-deviation-of-grouped-data/
# https://stackoverflow.com/questions/29825537/group-categories-in-r-according-to-first-letters-of-a-string
# https://datatofish.com/create-dataframe-in-r/
# https://stackoverflow.com/questions/31518150/gsub-in-r-is-not-replacing-dot
# http://www.sthda.com/english/wiki/unpaired-two-samples-wilcoxon-test-in-r
# https://blogs.sas.com/content/iml/2011/04/27/log-transformations-how-to-handle-negative-data-values.html
# https://stats.stackexchange.com/questions/155429/how-to-transform-negative-values-to-logarithms
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1534033/
# https://www.statisticshowto.com/probability-and-statistics/statistics-definitions/parametric-and-non-parametric-data/
# https://www.bmj.com/content/312/7038/1079.full 
# https://stats.stackexchange.com/questions/589920/how-can-i-back-transform-a-log-data-to-interpret-t-test-and-get-original-ci
# https://www.biostars.org/p/16481/
# https://whitlockschluter3e.zoology.ubc.ca/RLabs/R_tutorial_Contingency_analysis.html  
# https://sphweb.bumc.bu.edu/otlt/mph-modules/ep/ep713_randomerror/ep713_randomerror6.html
# https://www.r-bloggers.com/2015/01/easy-error-propagation-in-r/ 
# https://www.biostars.org/p/342756/ 
# https://en.wikipedia.org/wiki/Tukey%27s_range_test 
# https://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test
# Jukka Vaari 1993 Fysiikan Laboratoriotyöt :)
# https://www.r-bloggers.com/2020/03/how-to-standardize-group-colors-in-data-visualizations-in-r/
# https://stackoverflow.com/questions/71745719/how-to-control-stripe-transparency-using-ggforestplot-geom-stripes
# https://stackoverflow.com/questions/10547487/remove-facet-wrap-labels-completely
# https://stackoverflow.com/questions/24169675/multiple-colors-in-a-facet-strip-background-in-ggplot
# http://www.sthda.com/english/wiki/ggplot2-axis-scales-and-transformations 
# https://www.statology.org/geom_point-fill/
# https://stackoverflow.com/questions/62093084/set-geom-vline-line-types-and-sizes-with-aes-mapping-in-ggplot2
# https://en.wikipedia.org/wiki/Standard_error
# https://stackoverflow.com/questions/45271448/r-finding-intersection-between-two-vectors
# https://www.reddit.com/r/Rlanguage/comments/nq773b/reading_multiple_rdata_files_into_a_list/
# https://stackoverflow.com/questions/38643000/naming-list-elements-in-r
# https://www.rdocumentation.org/packages/base/versions/3.6.2/topics/rep
# https://stackoverflow.com/questions/10294284/remove-all-special-characters-from-a-string-in-r
# https://stats.stackexchange.com/questions/282155/causal-mediation-analysis-negative-indirect-and-total-effect-positive-direct
# https://jokergoo.github.io/circlize_book/book/advanced-layout.htmlcombine-circular-plots
# https://stackoverflow.com/questions/31943102/rotate-labels-in-a-chorddiagram-r-circlize
# https://bioconductor.org/packages/release/bioc/vignettes/scater/inst/doc/overview.html
# https://stats.stackexchange.com/questions/79399/calculate-variance-explained-by-each-predictor-in-multiple-regression-using-r
# https://rdrr.io/github/MRCIEU/TwoSampleMR/man/get_r_from_pn.html
# https://onlinestatbook.com/2/effect_size/variance_explained.html
# https://stackoverflow.com/questions/10441437/why-am-i-getting-x-in-my-column-names-when-reading-a-data-frame
# https://stackoverflow.com/questions/27044727/removing-characters-from-string-in-r
# https://www.tidyverse.org/blog/2020/08/taking-control-of-plot-scaling/
# https://r4ds.had.co.nz/graphics-for-communication.htmlfigure-sizing
# https://bioconductor.org/packages/devel/bioc/vignettes/SingleCellExperiment/inst/doc/intro.html
# https://bioinformatics.stackexchange.com/questions/22414/when-analysing-microarray-data-is-it-need-to-do-normalization-and-standardizati
# https://bioinformatics.stackexchange.com/questions/22426/inconsistent-microarray-expression-levels-after-normalizing-with-log2
# https://www.reddit.com/r/bioinformatics/comments/1ejs94m/log2_transformation_and_quantile_normalization/
# https://www.statology.org/cohens-d-in-r/
# https://stackoverflow.com/questions/10688137/how-to-fix-spaces-in-column-names-of-a-data-frame-remove-spaces-inject-dots
# https://stats.stackexchange.com/questions/190763/how-to-decide-which-glm-family-to-use   
# https://gettinggeneticsdone.blogspot.com/2011/01/rstats-function-for-extracting-f-test-p.html
# https://www.rdocumentation.org/packages/corrplot/versions/0.94/topics/corrplot
# https://stackoverflow.com/questions/26574054/how-to-change-font-size-of-the-correlation-coefficient-in-corrplot
# https://stackoverflow.com/questions/9543343/plot-a-jpg-image-using-base-graphics-in-r
# https://scales.arabpsychology.com/stats/how-to-remove-the-last-character-from-a-string-in-r-2-examples/
# https://ggplot2.tidyverse.org/reference/geom_smooth.html
# https://stats.stackexchange.com/questions/190763/how-to-decide-which-glm-family-to-use
# https://www.geeksforgeeks.org/r-check-if-a-directory-exists-and-create-if-it-does-not/
# https://r-graph-gallery.com/network.html)
# https://www.geeksforgeeks.org/elementwise-matrix-multiplication-in-r/
# https://r-graph-gallery.com/249-igraph-network-map-a-color.html
# https://rdrr.io/cran/mlma/man/data.org.html 
# https://cran.r-project.org/web/packages/mma/mma.pdf
# https://www.statology.org/glm-vs-lm-in-r/
# https://intro2r.com/loops.html 
# https://www.benjaminbell.co.uk/2022/12/loops-in-r-nested-loops.html 
# https://stackoverflow.com/questions/21847830/handling-java-lang-outofmemoryerror-when-writing-to-excel-from-r
# https://topepo.github.io/caret/parallel-processing.html 
# https://bookdown.org/content/b472c7b3-ede5-40f0-9677-75c3704c7e5c/more-than-one-mediator.html
# https://ds-pl-r-book.netlify.app/optimization-in-r.html
# https://stackoverflow.com/questions/31518150/gsub-in-r-is-not-replacing-dot 
# https://www.rdocumentation.org/packages/corrplot/versions/0.92/topics/corrplot
# https://cran.r-project.org/web/packages/corrplot/vignettes/corrplot-intro.html
# https://statisticsglobe.com/change-font-size-corrplot-r
# https://stackoverflow.com/questions/51115495/how-to-keep-order-of-the-correlation-plot-labels-as-same-in-the-datafile
# https://stat.ethz.ch/R-manual/R-devel/library/stats/html/p.adjust.html 
# https://www.middleprofessor.com/files/applied-biostatistics_bookdown/_book/adding-covariates-to-a-linear-model
# https://github.com/MarioniLab/miloR
# https://www.nature.com/articles/s41467-023-40458-9/figures/4
# https://stats.stackexchange.com/questions/282155/causal-mediation-analysis-negative-indirect-and-total-effect-positive-direct 
# https://www.researchgate.net/post/How_can_I_interpret_a_negative_indirect_effect_for_significant_mediation
# https://stackoverflow.com/questions/31518150/gsub-in-r-is-not-replacing-dot replacing dot
# https://stackoverflow.com/questions/38862303/customize-ggplot2-axis-labels-with-different-colors
# https://www.tutorialspoint.com/how-to-remove-rows-from-data-frame-in-r-that-contains-nan
# https://stackoverflow.com/questions/21847830/handling-java-lang-outofmemoryerror-when-writing-to-excel-from-r
# https://www.tutorialspoint.com/how-to-extract-strings-that-contains-a-particular-substring-in-an-r-vector
# https://stackoverflow.com/questions/13602170/how-do-i-find-the-difference-between-two-values-without-knowing-which-is-larger
# https://www.geeksforgeeks.org/difference-between-two-vectors-in-r/